File size: 2,242 Bytes
e085e3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from tqdm import trange

def fill_memory(agent, env, num_episodes=500 ):
    print("Filling up memory....")
    for _ in trange(500):
        state = env.reset()
        done = False 
        while not done:
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.cache(state, next_state, action, reward, done)
            state = next_state  


# def train(agent, env, logger):
#     episodes = 5000
#     for e in range(episodes):

#         state = env.reset()
#         # Play the game!
#         while True:
        
#             # Run agent on the state
#             action = agent.act(state)
            
#             # Agent performs action
#             next_state, reward, done, info = env.step(action)
            
#             # Remember
#             agent.cache(state, next_state, action, reward, done)

#             # Learn
#             q, loss = agent.learn()

#             # Logging
#             logger.log_step(reward, loss, q)

#             # Update state
#             state = next_state
            
#             # Check if end of game
#             if done:
#                 break
        
#         logger.log_episode(e)

#         if e % 20 == 0:
#             logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)


def train(agent, env, logger):
    episodes = 5000
    for e in range(episodes):

        state = env.reset()
        # Play the game!
        for i in range(1000):
        
            # Run agent on the state
            action = agent.act(state)
            env.render()
            # Agent performs action
            next_state, reward, done, info = env.step(action)
            
            # Remember
            agent.cache(state, next_state, action, reward, done, i)

            # Learn
            q, loss = agent.learn()

            # Logging
            logger.log_step(reward, loss, q)

            # Update state
            state = next_state
            
            # Check if end of game
            if done:
                break
        
        logger.log_episode(e)

        if e % 20 == 0:
            logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)