rail-berkeley · hartikainen · Mar 3, 2019 · Mar 3, 2019 · Mar 3, 2019 · Mar 3, 2019
diff --git a/README.md b/README.md
@@ -74,13 +74,13 @@ docker-compose \
 softlearning run_example_local examples.development \
     --universe=gym \
     --domain=HalfCheetah \
-    --task=v2 \
+    --task=v3 \
     --exp-name=my-sac-experiment-1 \
     --checkpoint-frequency=1000  # Save the checkpoint to resume training later
 ```
 
 2. To simulate the resulting policy:
-First, find the path that the checkpoint is saved to. By default (i.e. without specifying the `log-dir` argument to the previous script), the data is saved under `~/ray_results/<universe>/<domain>/<task>/<datatimestamp>-<exp-name>/<trial-id>/<checkpoint-id>`. For example: `~/ray_results/gym/HalfCheetah/v2/2018-12-12T16-48-37-my-sac-experiment-1-0/mujoco-runner_0_seed=7585_2018-12-12_16-48-37xuadh9vd/checkpoint_1000/`. The next command assumes that this path is found from `${SAC_CHECKPOINT_DIR}` environment variable.
+First, find the path that the checkpoint is saved to. By default (i.e. without specifying the `log-dir` argument to the previous script), the data is saved under `~/ray_results/<universe>/<domain>/<task>/<datatimestamp>-<exp-name>/<trial-id>/<checkpoint-id>`. For example: `~/ray_results/gym/HalfCheetah/v3/2018-12-12T16-48-37-my-sac-experiment-1-0/mujoco-runner_0_seed=7585_2018-12-12_16-48-37xuadh9vd/checkpoint_1000/`. The next command assumes that this path is found from `${SAC_CHECKPOINT_DIR}` environment variable.
 
 ```
 python -m examples.development.simulate_policy \
@@ -146,7 +146,7 @@ In order to resume training from previous checkpoint, run the original example m
 softlearning run_example_local examples.development \
     --universe=gym \
     --domain=HalfCheetah \
-    --task=v2 \
+    --task=v3 \
     --exp-name=my-sac-experiment-1 \
     --checkpoint-frequency=1000 \
     --restore=${SAC_CHECKPOINT_PATH}

diff --git a/examples/development/variants.py b/examples/development/variants.py
@@ -136,21 +136,21 @@
     'Walker2d': {  # 6 DoF
     },
     'Ant': {  # 8 DoF
-        'Parameterizable-v0': {
+        'Parameterizable-v3': {
             'healthy_reward': 0.0,
             'healthy_z_range': (-np.inf, np.inf),
             'exclude_current_positions_from_observation': False,
         }
     },
     'Humanoid': {  # 17 DoF
-        'Parameterizable-v0': {
+        'Parameterizable-v3': {
             'healthy_reward': 0.0,
             'healthy_z_range': (-np.inf, np.inf),
             'exclude_current_positions_from_observation': False,
         }
     },
     'Pusher2d': {  # 3 DoF
-        'Default-v0': {
+        'Default-v3': {
             'arm_object_distance_cost_coeff': 0.0,
             'goal_object_distance_cost_coeff': 1.0,
             'goal': (0, -1),

diff --git a/examples/instrument.py b/examples/instrument.py
@@ -276,7 +276,8 @@ def run_example_cluster(example_module_name, example_argv):
         experiments,
         with_server=example_args.with_server,
         server_port=4321,
-        scheduler=None)
+        scheduler=None,
+        queue_trials=True)
 
 
 def launch_example_cluster(example_module_name,

diff --git a/requirements.txt b/requirements.txt
@@ -32,7 +32,7 @@ google-auth==1.6.1
 google-auth-httplib2==0.0.3
 grpcio==1.16.1
 gtimer==1.0.0b5
-gym==0.11.0
+gym==0.12.0
 h5py==2.8.0
 httplib2==0.12.0
 idna==2.7

diff --git a/softlearning/environments/gym/__init__.py b/softlearning/environments/gym/__init__.py
@@ -13,34 +13,28 @@
 
 MUJOCO_ENVIRONMENT_SPECS = (
     {
-        'id': 'Swimmer-Parameterizable-v0',
-        'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
-                        '.swimmer:SwimmerEnv'),
+        'id': 'Swimmer-Parameterizable-v3',
+        'entry_point': (f'gym.envs.mujoco.swimmer_v3:SwimmerEnv'),
     },
     {
-        'id': 'Hopper-Parameterizable-v0',
-        'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
-                        '.hopper:HopperEnv'),
+        'id': 'Hopper-Parameterizable-v3',
+        'entry_point': (f'gym.envs.mujoco.hopper_v3:HopperEnv'),
     },
     {
-        'id': 'Walker2d-Parameterizable-v0',
-        'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
-                        '.walker2d:Walker2dEnv'),
+        'id': 'Walker2d-Parameterizable-v3',
+        'entry_point': (f'gym.envs.mujoco.walker2d_v3:Walker2dEnv'),
     },
     {
-        'id': 'HalfCheetah-Parameterizable-v0',
-        'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
-                        '.half_cheetah:HalfCheetahEnv'),
+        'id': 'HalfCheetah-Parameterizable-v3',
+        'entry_point': (f'gym.envs.mujoco.half_cheetah_v3:HalfCheetahEnv'),
     },
     {
-        'id': 'Ant-Parameterizable-v0',
-        'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
-                        '.ant:AntEnv'),
+        'id': 'Ant-Parameterizable-v3',
+        'entry_point': (f'gym.envs.mujoco.ant_v3:AntEnv'),
     },
     {
-        'id': 'Humanoid-Parameterizable-v0',
-        'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
-                        '.humanoid:HumanoidEnv'),
+        'id': 'Humanoid-Parameterizable-v3',
+        'entry_point': (f'gym.envs.mujoco.humanoid_v3:HumanoidEnv'),
     },
     {
         'id': 'Pusher2d-Default-v0',

diff --git a/softlearning/environments/gym/mujoco/ant.py b/softlearning/environments/gym/mujoco/ant.py
diff --git a/softlearning/environments/gym/mujoco/half_cheetah.py b/softlearning/environments/gym/mujoco/half_cheetah.py