1
1
import argparse
2
- import gym
2
+ import gymnasium as gym
3
3
import numpy as np
4
4
import os
5
5
from itertools import count
@@ -85,7 +85,7 @@ class Observer:
85
85
def __init__ (self ):
86
86
self .id = rpc .get_worker_info ().id
87
87
self .env = gym .make ('CartPole-v1' )
88
- self .env .seed ( args .seed )
88
+ self .env .reset ( seed = args .seed )
89
89
90
90
def run_episode (self , agent_rref , n_steps ):
91
91
r"""
@@ -95,18 +95,18 @@ def run_episode(self, agent_rref, n_steps):
95
95
agent_rref (RRef): an RRef referencing the agent object.
96
96
n_steps (int): number of steps in this episode
97
97
"""
98
- state , ep_reward = self .env .reset (), 0
98
+ state , ep_reward = self .env .reset ()[ 0 ] , 0
99
99
for step in range (n_steps ):
100
100
# send the state to the agent to get an action
101
101
action = _remote_method (Agent .select_action , agent_rref , self .id , state )
102
102
103
103
# apply the action to the environment, and get the reward
104
- state , reward , done , _ = self .env .step (action )
104
+ state , reward , terminated , truncated , _ = self .env .step (action )
105
105
106
106
# report the reward to the agent for training purpose
107
107
_remote_method (Agent .report_reward , agent_rref , self .id , reward )
108
108
109
- if done :
109
+ if terminated or truncated :
110
110
break
111
111
112
112
class Agent :
0 commit comments