update README && ngrok instruction

RexSkywalkerLee · RexSkywalkerLee · commit 8d98627f87c6 · 2024-07-06T00:08:53.000-07:00
diff --git a/README.md b/README.md
@@ -34,21 +34,23 @@ This code contains implementation for teleoperation and imitation learning of Op
 ## Installation
 
 ```bash
-conda create -n tv python=3.8
-conda activate tv
-pip install -r requirements.txt
-cd act/detr && pip install -e .
+    conda create -n tv python=3.8
+    conda activate tv
+    pip install -r requirements.txt
+    cd act/detr && pip install -e .
 ```
 
-If you want to try teleoperation example with an active cam with zed camera (teleop_active_cam.py):
+Install ZED sdk: https://www.stereolabs.com/developers/release/
 
-Install zed sdk: https://www.stereolabs.com/developers/release/
+Install ZED Python API:
+```
+    cd /usr/local/zed/ && python get_python_api.py
+```
 
 If you want to try teleoperation example in a simulated environment (teleop_hand.py):
 
 Install Isaac Gym: https://developer.nvidia.com/isaac-gym/
 
-
 ## Teleoperation Guide
 
 ### Local streaming
@@ -57,34 +59,34 @@ Apple does not allow WebXR on non-https connections. To test the application loc
 2. check local ip address: 
 
 ```
-ifconfig | grep inet
+    ifconfig | grep inet
 ```
 Suppose the local ip address of the ubuntu machine is `192.168.8.102`.
 
 3. create certificate: 
 
 ```
-mkcert -install && mkcert -cert-file cert.pem -key-file key.pem 192.168.8.102 localhost 127.0.0.1
+    mkcert -install && mkcert -cert-file cert.pem -key-file key.pem 192.168.8.102 localhost 127.0.0.1
 ```
 
 4. open firewall on server
 ```
-sudo iptables -A INPUT -p tcp --dport 8012 -j ACCEPT
-sudo iptables-save
-sudo iptables -L
+    sudo iptables -A INPUT -p tcp --dport 8012 -j ACCEPT
+    sudo iptables-save
+    sudo iptables -L
 ```
 or can be done with `ufw`:
 ```
-sudo ufw allow 8012
+    sudo ufw allow 8012
 ```
 5.
-```python
-self.app = Vuer(host='0.0.0.0', cert="./cert.pem", key="./key.pem")
+```
+    self.app = Vuer(host='0.0.0.0', cert="./cert.pem", key="./key.pem")
 ```
 
 6. install ca-certificates on VisionPro
 ```
-mkcert -CAROOT
+    mkcert -CAROOT
 ```
 Copy the rootCA.pem via AirDrop to VisionPro and install it.
 
@@ -102,14 +104,19 @@ For Meta Quest3, installation of the certificate is not trivial. We need to use
 1. Install ngrok: https://ngrok.com/download
 2. Run ngrok
 ```
-ngrok http 8012
+    ngrok http 8012
 ```
 3. Copy the https address and open the browser on Meta Quest3 and go to the address.
 
+ps. When using ngrok for network streaming, remember to call `OpenTeleVision` with:
+```
+    self.tv = OpenTeleVision(self.resolution_cropped, self.shm.name, image_queue, toggle_streaming, ngrok=True)
+```
+
 ### Simulation Teleoperation Example
 1. After setup up streaming with either local or network streaming following the above instructions, you can try teleoperating two robot hands in Issac Gym:
 ```
-cd teleop && python teleop_hand.py
+    cd teleop && python teleop_hand.py
 ```
 2. Go to your vuer site on VisionPro, click `Enter VR` and ``Allow`` to enter immersive environment.
 
@@ -136,7 +143,10 @@ cd teleop && python teleop_hand.py
                                --save_jit --resume_ckpt 25000
 ```
 
-7. You can visualize the trained policy with inputs from dataset using ``scripts/deploy_sim.py``.
+7. You can visualize the trained policy with inputs from dataset using ``scripts/deploy_sim.py``, example usage:
+```
+    python deploy_sim.py --taskid 00 --exptid 01 --resume_ckpt 25000
+```
 
 ## Citation
 ```
diff --git a/act/utils.py b/act/utils.py
@@ -9,15 +9,15 @@
 from pathlib import Path
 
 class EpisodicDataset(torch.utils.data.Dataset):
-    def __init__(self, episode_ids, dataset_dir, camera_names, norm_stats, episode_len, control_mode, history_stack=0):
+    def __init__(self, episode_ids, dataset_dir, camera_names, norm_stats, episode_len, history_stack=0):
         super(EpisodicDataset).__init__()
         self.episode_ids = episode_ids
         self.dataset_dir = dataset_dir
         self.camera_names = camera_names
         self.norm_stats = norm_stats
         self.is_sim = None
         self.max_pad_len = 200
-        action_str = 'qpos_action' if control_mode == 'qpos' else 'ee_action'
+        action_str = 'qpos_action'
 
         self.history_stack = history_stack
 
@@ -122,8 +122,8 @@ def __getitem__(self, ts_index):
         return image_data, qpos_data, action_data, is_pad
 
 
-def get_norm_stats(dataset_dir, num_episodes, control_mode):
-    action_str = 'qpos_action' if control_mode == 'qpos' else 'ee_action'
+def get_norm_stats(dataset_dir, num_episodes):
+    action_str = 'qpos_action'
     all_qpos_data = []
     all_action_data = []
     all_episode_len = []
@@ -171,7 +171,6 @@ def BatchSampler(batch_size, episode_len_l, sample_weights=None):
         yield batch
 
 def load_data(dataset_dir, camera_names, batch_size_train, batch_size_val):
-    control_mode = "qpos"
     print(f'\nData from: {dataset_dir}\n')
 
     all_eps = find_all_processed_episodes(dataset_dir)
@@ -184,16 +183,16 @@ def load_data(dataset_dir, camera_names, batch_size_train, batch_size_val):
     val_indices = shuffled_indices[int(train_ratio * num_episodes):]
     print(f"Train episodes: {len(train_indices)}, Val episodes: {len(val_indices)}")
     # obtain normalization stats for qpos and action
-    norm_stats, all_episode_len = get_norm_stats(dataset_dir, num_episodes, control_mode)
+    norm_stats, all_episode_len = get_norm_stats(dataset_dir, num_episodes)
 
     train_episode_len_l = [all_episode_len[i] for i in train_indices]
     val_episode_len_l = [all_episode_len[i] for i in val_indices]
     batch_sampler_train = BatchSampler(batch_size_train, train_episode_len_l)
     batch_sampler_val = BatchSampler(batch_size_val, val_episode_len_l, None)
 
     # construct dataset and dataloader
-    train_dataset = EpisodicDataset(train_indices, dataset_dir, camera_names, norm_stats, train_episode_len_l, control_mode)
-    val_dataset = EpisodicDataset(val_indices, dataset_dir, camera_names, norm_stats, val_episode_len_l, control_mode)
+    train_dataset = EpisodicDataset(train_indices, dataset_dir, camera_names, norm_stats, train_episode_len_l)
+    val_dataset = EpisodicDataset(val_indices, dataset_dir, camera_names, norm_stats, val_episode_len_l)
     train_dataloader = DataLoader(train_dataset, batch_sampler=batch_sampler_train, pin_memory=True, num_workers=24, prefetch_factor=2)
     val_dataloader = DataLoader(val_dataset, batch_sampler=batch_sampler_val, pin_memory=True, num_workers=16, prefetch_factor=2)
 
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,6 @@ aiohttp==3.9.5
 aiohttp_cors==0.7.0
 aiortc==1.8.0
 av==11.0.0
-core==1.0.1
 dex_retargeting==0.1.1
 dynamixel_sdk==3.7.31
 einops==0.8.0
@@ -17,9 +16,8 @@ pandas==2.0.3
 params_proto==2.12.1
 pytransform3d==3.5.0
 PyYAML==6.0.1
-pyzed==4.1
 scikit_learn==1.3.2
-scipy==1.14.0
+scipy==1.10.1
 seaborn==0.13.2
 setuptools==69.5.1
 torch==2.3.0
diff --git a/scripts/deploy_sim.py b/scripts/deploy_sim.py
@@ -86,13 +86,7 @@ def merge_act(actions_for_curr_step, k = 0.01):
     policy_path = Path(exp_path) / f"traced_jit_{args['resume_ckpt']}.pt"
     
     temporal_agg = True
-    control_mode = "qpos"
-    if control_mode == "ee":
-        action_dim = 26
-    elif control_mode == "qpos":
-        action_dim = 28
-    else:
-        raise ValueError("Invalid control mode")
+    action_dim = 28
 
     chunk_size = 60
     device = "cuda"
@@ -150,7 +144,7 @@ def merge_act(actions_for_curr_step, k = 0.01):
             if history_stack > 0:
                 last_action_queue.append(act)
             act = act * norm_stats["action_std"] + norm_stats["action_mean"]
-            player.step(act, left_imgs[t], right_imgs[t], control_mode)
+            player.step(act, left_imgs[t], right_imgs[t])
     except KeyboardInterrupt:
         player.end()
         exit()
diff --git a/scripts/plot_action.py b/scripts/plot_action.py
@@ -18,7 +18,7 @@
     episode_path = Path(root) / exp_name / "processed" / episode_name
 
     data = h5py.File(str(episode_path), 'r')
-    actions = np.array(data['action'])
+    actions = np.array(data['qpos_action'])
     data.close()
     timestamps = actions.shape[0]
     action_dim = actions.shape[1]
diff --git a/scripts/post_process.py b/scripts/post_process.py
@@ -111,30 +111,6 @@ def process_episode(file_name, ep):
     timesteps = len(closest_indices)
     qpos_actions = actions[closest_indices]
     cmds = cmds[closest_indices]
-
-    # compose new actions
-    # left wrist relative pose(6) + left hand qpos(6) + right wrist relative pose(6) + right hand qpos(6) + head yp(2)
-    ee_actions = np.zeros((timesteps, 6+6+6+6+2))
-    for t in range(timesteps):
-
-        # compute left wrist absolute pose
-        left_ee = cmds[t, 16:32].reshape((4, 4))
-        ee_actions[t, 0:3] = left_ee[0:3, 3]
-        ee_actions[t, 3:6] = rotations.intrinsic_euler_zyx_from_active_matrix(left_ee[0:3, 0:3])
-
-        # left hand qpos
-        ee_actions[t, 6:12] = qpos_actions[t, 7:13]
-
-        # compute right wrist absolute pose
-        right_ee = cmds[t, 32:48].reshape((4, 4))
-        ee_actions[t, 12:15] = right_ee[0:3, 3]
-        ee_actions[t, 15:18] = rotations.intrinsic_euler_zyx_from_active_matrix(right_ee[0:3, 0:3])
-
-        # right hand qpos
-        ee_actions[t, 18:24] = qpos_actions[t, 20:26]
-
-        # head yp
-        ee_actions[t, 24:26] = qpos_actions[t, 26:28]
     
     # save_video(left_imgs, file_name + ".mp4")
     path = os.path.dirname(file_name)
@@ -145,7 +121,6 @@ def process_episode(file_name, ep):
         start = time.time()
         hf.create_dataset('observation.image.left', data=left_imgs)
         hf.create_dataset('observation.image.right', data=right_imgs)
-        hf.create_dataset('ee_action', data=ee_actions.astype(np.float32))
         hf.create_dataset('cmds', data=cmds.astype(np.float32))
         hf.create_dataset('observation.state', data=states[closest_indices].astype(np.float32))
         hf.create_dataset('qpos_action', data=qpos_actions.astype(np.float32))
diff --git a/scripts/replay_demo.py b/scripts/replay_demo.py
@@ -6,7 +6,6 @@
 import matplotlib.pyplot as plt
 
 from pytransform3d import rotations
-from core.RobotController import RobotController
 
 from pathlib import Path
 import h5py
@@ -20,7 +19,6 @@
 class Player:
     def __init__(self, dt=1/60):
         self.dt = dt
-        self.controller = None
         self.head_mat = None
         self.left_wrist_mat = None
         self.right_wrist_mat = None
@@ -91,14 +89,11 @@ def __init__(self, dt=1/60):
         cam_target = gymapi.Vec3(0, 0, 1)
         self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
 
-        self.controller = RobotController()
-        self.controller.load_config('h1_inspire.yml')
-
         plt.figure(figsize=(12, 6))
         plt.ion()
 
-    def step(self, action, left_img, right_img, control_mode='qpos'):
-        qpos = self.convert_h1_qpos(action, control_mode)
+    def step(self, action, left_img, right_img):
+        qpos = self.convert_h1_qpos(action)
         states = np.zeros(qpos.shape, dtype=gymapi.DofState.dtype)
         states['pos'] = qpos
         self.gym.set_actor_dof_states(self.env, self.robot_handle, states, gymapi.STATE_POS)
@@ -124,63 +119,34 @@ def end(self):
         self.gym.destroy_sim(self.sim)
         plt.close()
 
-    def convert_h1_qpos(self, action, control_mode):
+    def convert_h1_qpos(self, action):
         '''
         left_arm_indices = [13, 14, 15, 16, 17, 18, 19]
         right_arm_indices = [32, 33, 34, 35, 36, 37, 38]
         left_hand_indices = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
         right_hand_indices = [39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
         '''
-        if control_mode == 'ee':
-            left_wrist_mat = np.eye(4)
-            left_wrist_mat[0:3, 3] = action[0:3]
-            left_wrist_mat[0:3, 0:3] = rotations.active_matrix_from_intrinsic_euler_zyx(action[3:6])
-
-            right_wrist_mat = np.eye(4)
-            right_wrist_mat[0:3, 3] = action[12:15]
-            right_wrist_mat[0:3, 0:3] = rotations.active_matrix_from_intrinsic_euler_zyx(action[15:18])
-
-            self.controller.update(np.eye(4), left_wrist_mat, right_wrist_mat, np.zeros((25,3)), np.zeros((25,3)))
-            qpos = self.controller.qpos
-
-            # left hand actions
-            qpos[20:22] = action[6]
-            qpos[22:24] = action[7]
-            qpos[24:26] = action[8]
-            qpos[26:28] = action[9]
-            qpos[28] = action[10]
-            qpos[29:32] = action[11] * np.array([1, 1.6, 2.4])
-
-            # right hand actions
-            qpos[39:41] = action[18]
-            qpos[41:43] = action[19]
-            qpos[43:45] = action[20]
-            qpos[45:47] = action[21]
-            qpos[47] = action[22]
-            qpos[48:51] = action[23] * np.array([1, 1.6, 2.4])
-        elif control_mode == 'qpos':
-            qpos = np.zeros(51)
-            qpos[13:20] = action[0:7]
-
-            # left hand actions
-            qpos[20:22] = action[7]
-            qpos[22:24] = action[8]
-            qpos[24:26] = action[9]
-            qpos[26:28] = action[10]
-            qpos[28] = action[11]
-            qpos[29:32] = action[12] * np.array([1, 1.6, 2.4])
-
-            qpos[32:39] = action[13:20]
-
-            # right hand actions
-            qpos[39:41] = action[20]
-            qpos[41:43] = action[21]
-            qpos[43:45] = action[22]
-            qpos[45:47] = action[23]
-            qpos[47] = action[24]
-            qpos[48:51] = action[25] * np.array([1, 1.6, 2.4])
-        else:
-            raise NotImplementedError('Invalid control mode')
+        qpos = np.zeros(51)
+        qpos[13:20] = action[0:7]
+
+        # left hand actions
+        qpos[20:22] = action[7]
+        qpos[22:24] = action[8]
+        qpos[24:26] = action[9]
+        qpos[26:28] = action[10]
+        qpos[28] = action[11]
+        qpos[29:32] = action[12] * np.array([1, 1.6, 2.4])
+
+        qpos[32:39] = action[13:20]
+
+        # right hand actions
+        qpos[39:41] = action[20]
+        qpos[41:43] = action[21]
+        qpos[43:45] = action[22]
+        qpos[45:47] = action[23]
+        qpos[47] = action[24]
+        qpos[48:51] = action[25] * np.array([1, 1.6, 2.4])
+
         return qpos
 
 if __name__ == '__main__':
diff --git a/teleop/TeleVision.py b/teleop/TeleVision.py
@@ -8,12 +8,16 @@
 from webrtc.zed_server import *
 
 class OpenTeleVision:
-    def __init__(self, img_shape, shm_name, queue, toggle_streaming, stream_mode="image", cert_file="./cert.pem", key_file="./key.pem"):
+    def __init__(self, img_shape, shm_name, queue, toggle_streaming, stream_mode="image", cert_file="./cert.pem", key_file="./key.pem", ngrok=False):
         # self.app=Vuer()
         self.img_shape = (img_shape[0], 2*img_shape[1], 3)
         self.img_height, self.img_width = img_shape[:2]
 
-        self.app = Vuer(host='0.0.0.0', cert=cert_file, key=key_file, queries=dict(grid=False))
+        if ngrok:
+            self.app = Vuer(host='0.0.0.0', queries=dict(grid=False), queue_len=3)
+        else:
+            self.app = Vuer(host='0.0.0.0', cert=cert_file, key=key_file, queries=dict(grid=False), queue_len=3)
+
         self.app.add_handler("HAND_MOVE")(self.on_hand_move)
         self.app.add_handler("CAMERA_MOVE")(self.on_cam_move)
         if stream_mode == "image":