-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathrandom_rollouts.py
148 lines (119 loc) · 5.43 KB
/
random_rollouts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import argparse
import os
import time
from multiprocessing import cpu_count, Pool
import gzip
import gym
from scipy.misc import imresize
import numpy as np
from lib.utils import log, mkdir
from lib.constants import DOOM_GAMES
try:
from lib.env_wrappers import ViZDoomWrapper
except Exception as e:
None
ID = "random_rollouts"
def generate_action(low, high, prev_action, balance_no_actions=False, force_actions=False):
if np.random.randint(10) % 10 and prev_action is not None:
return (prev_action)
action_len = len(low)
action = [0 for i in range(action_len)]
while True:
for i in range(action_len):
random = np.random.randint(low[i], high[i]+1)
if random % action_len:
action[i] = random
# Because in many games all 0's or all 1's are the same action (no action), we want to limit
# those to be the equal probability as all other combinations of actions. Maybe there's a
# better way, or I'm not thinking of something:
if balance_no_actions and (all(a == 0 for a in action) or all(a == 1 for a in action)) and np.random.randint(2)==0:
action = [0 for i in range(action_len)]
continue
if force_actions and all(a == 0 for a in action):
continue
break
return (np.array(action).astype(np.float32))
def worker(worker_arg_tuple):
rollouts_per_core, args, output_dir = worker_arg_tuple
np.random.seed()
if args.game in DOOM_GAMES:
env = ViZDoomWrapper(args.game)
else:
env = gym.make(args.game)
for rollout_num in rollouts_per_core:
t = 1
actions_array = []
frames_array = []
rewards_array = []
observation = env.reset()
frames_array.append(imresize(observation, (args.frame_resize, args.frame_resize)))
start_time = time.time()
prev_action = None
while True:
# action = env.action_space.sample()
action = generate_action(env.action_space.low,
env.action_space.high,
prev_action,
balance_no_actions=True if args.game in DOOM_GAMES else False,
force_actions=False if args.game in DOOM_GAMES else True)
prev_action = action
observation, reward, done, _ = env.step(action)
actions_array.append(action)
frames_array.append(imresize(observation, (args.frame_resize, args.frame_resize)))
rewards_array.append(reward)
if done:
log(ID,
"\t> Rollout {}/{} finished after {} timesteps in {:.2f}s".format(rollout_num, args.num_rollouts, t,
(time.time() - start_time)))
break
t = t + 1
actions_array = np.asarray(actions_array)
frames_array = np.asarray(frames_array)
rewards_array = np.asarray(rewards_array).astype(np.float32)
rollout_dir = os.path.join(output_dir, str(rollout_num))
mkdir(rollout_dir)
# from lib.utils import post_process_image_tensor
# import imageio
# imageio.mimsave(os.path.join(output_dir, str(rollout_num), 'rollout.gif'), post_process_image_tensor(frames_array), fps=20)
with gzip.GzipFile(os.path.join(rollout_dir, "frames.npy.gz"), "w") as file:
np.save(file, frames_array)
np.savez_compressed(os.path.join(rollout_dir, "misc.npz"),
action=actions_array,
reward=rewards_array)
with open(os.path.join(rollout_dir, "count"), "w") as file:
print("{}".format(frames_array.shape[0]), file=file)
env.close()
def main():
parser = argparse.ArgumentParser(description='World Models ' + ID)
parser.add_argument('--data_dir', '-d', default="/data/wm", help='The base data/output directory')
parser.add_argument('--game', default='CarRacing-v0',
help='Game to use') # https://gym.openai.com/envs/CarRacing-v0/
parser.add_argument('--experiment_name', default='experiment_1', help='To isolate its files from others')
parser.add_argument('--num_rollouts', '-n', default=100, type=int, help='Number of rollouts to collect')
parser.add_argument('--offset', '-o', default=0, type=int,
help='Offset rollout count, in case running on distributed cluster')
parser.add_argument('--frame_resize', '-r', default=64, type=int, help='h x w resize of each observation frame')
parser.add_argument('--cores', default=0, type=int, help='Number of CPU cores to use. 0=all cores')
args = parser.parse_args()
log(ID, "args =\n " + str(vars(args)).replace(",", ",\n "))
output_dir = os.path.join(args.data_dir, args.game, args.experiment_name, ID)
mkdir(output_dir)
log(ID, "Starting")
if args.cores == 0:
cores = cpu_count()
else:
cores = args.cores
start = 1 + args.offset
end = args.num_rollouts + 1 + args.offset
rollouts_per_core = np.array_split(range(start, end), cores)
pool = Pool(cores)
worker_arg_tuples = []
for i in rollouts_per_core:
if len(i) != 0:
worker_arg_tuples.append((i, args, output_dir))
pool.map(worker, worker_arg_tuples)
pool.close()
pool.join()
log(ID, "Done")
if __name__ == '__main__':
main()