-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaccess_control.py
97 lines (68 loc) · 1.91 KB
/
access_control.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
import sys
from os.path import dirname, join, realpath
dir_path = dirname(dirname(realpath(__file__)))
sys.path.insert(1, join(dir_path, 'utils'))
from tile_coding import IHT, tiles
class AccessControl:
def __init__(self, n_servers):
self.n_servers = n_servers
self.rewards = [1, 2, 4, 8]
self.action = [-1, 1]
def take_action(self, state, action):
pass
class ValueFunction:
def __init__(self, n_tilings):
self.n_tilings = n_tilings
self.w = np.zeros(2048)
self.iht = IHT(2048)
def get_active_tiles(self, state, action):
'''
Get active tiles
Params
------
'''
active_tiles = tiles(self.iht, self.n_tilings, [state], [action])
return active_tiles
def get_value(self, state, action):
'''
Get value
'''
active_tiles = self.get_active_tiles(state, action)
return np.sum(self.w[active_tiles])
def learn(self, state, action, target, alpha):
'''
Update weight vector
Params
------
state:
'''
active_tiles = self.get_active_tiles(state, action)
estimate = np.sum(self.w[active_tiles])
error = target - estimate
for tile in active_tiles:
self.w[tile] += alpha * error
def epsilon_greedy(epsilon, value_function, env, state):
pass
def differential_semi_gradient_sarsa(value_function, env, alpha, beta, gamma, epsilon):
'''
Differenntial Semi-gradient Sarsa algorithm
Params
------
value_function: ValueFunction
env: AccessControl
alpha: float
step size param
gamma: float
discount factor
epsilon: float
epsilon greedy param
'''
pass
if __name__ == '__main__':
alpha = 0.01
beta = 0.01
gamma = 1
epsilon = 0.1