forked from omelyanchikd/ace
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqlearning_firm.py
80 lines (70 loc) · 3.15 KB
/
qlearning_firm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from firm import Firm
from firm_action import FirmAction
from firm_labormarket_action import FirmLaborMarketAction
from firm_goodmarket_action import FirmGoodMarketAction
import math
import random
import numpy
def argmax(two_dimensional_list, dimension):
arg_max = 0
max_val = two_dimensional_list[dimension][0]
for i in range(0,len(two_dimensional_list[dimension])):
if two_dimensional_list[dimension][i] > max_val:
max_val = two_dimensional_list[dimension][i]
arg_max = i
return arg_max
class QlearningFirm(Firm):
def __init__(self, id):
super().__init__(id)
self.plan = 50 * self.efficiency_coefficient
self.salary = 200
self.offer_count = 0
self.prev_workers = 50
self.actions = [(0.01, self.efficiency_coefficient), (0.01, 0), (0.01, -self.efficiency_coefficient),
(0, self.efficiency_coefficient), (0, 0), (0, -self.efficiency_coefficient),
(-0.01, self.efficiency_coefficient), (-0.01, 0), (-0.01, -self.efficiency_coefficient)]
self.action = (0,0)
self.state = 0
self.alpha = 0.5
self.gamma = 0.5
self.q = []
for state in range(0, 6):
self.q.append([])
for action in range(0, 9):
self.q[state].append(100)
def decide(self, stats):
return FirmAction(0, 0, 0, 0, 0, 0, [])
def decide_salary(self, stats):
self.update_state()
self.prev_workers = len(self.workers)
self.update()
self.action = self.actions[argmax(self.q, self.state)]
self.price *= (1 + self.action[0])
self.price = self.price if self.price > 0 else 0
self.plan += self.action[1]
self.plan = (self.plan - self.stock) // self.efficiency_coefficient * self.efficiency_coefficient
self.plan = self.plan if self.plan >= 0 else 0
self.offer_count = math.floor(self.plan / self.efficiency_coefficient) - len(self.workers)
while self.offer_count < 0:
self.fire_worker(random.choice(list(self.workers)))
self.offer_count += 1
self.salary = 0.95 * self.price * self.efficiency_coefficient
return FirmLaborMarketAction(self.offer_count, self.salary, [])
def decide_price(self, stats):
return FirmGoodMarketAction(self.stock, self.price, 0)
def update_state(self):
if len(self.workers) == 0:
self.state = 5
elif self.sold == 0:
self.state = 4
elif self.sold >= self.plan and len(self.workers) == self.prev_workers + self.offer_count:
self.state = 0
elif self.sold < self.plan and len(self.workers) == self.prev_workers + self.offer_count:
self.state = 1
elif self.sold == self.plan and len(self.workers) < self.prev_workers + self.offer_count:
self.state = 2
else:
self.state = 3
def update(self):
current_action = self.actions.index(self.action)
self.q[self.state][current_action] = self.q[self.state][current_action] + self.alpha * (self.profit + self.gamma * max(self.q[self.state]) - self.q[self.state][current_action])