change readme

KzZheng · KzZheng · commit da1a4a66de4e · 2022-03-28T04:22:24.000-07:00
diff --git a/README.md b/README.md
@@ -1,10 +1,33 @@
+# VLMbench: A Benchmark for Vision-and-Language Manipulation
+
+![task image missing](readme_files/tasks.svg)
+
+**VLMbench** is a robotics manipulation benchmark, which contains various language instructions on categorized robotic manipulation tasks. In this work, we aim to fill the blank of the last mile of embodied agents---object manipulation by following human guidance, e.g., “move the red mug next to the box while keeping it upright.” Meanwhile, we introduce an Automatic Manipulation Solver (**AMSolver**), where modular rule-based task templates are created to automatically generate robot demonstrations with language instructions, consisting of diverse object shapes and appearances, action types, and motion constraints.  [Click here for website and paper.](https://sites.google.com/corp/view/rlbench)
+
 The implementaions of AMSolver, VLM, and 6D-CLIPort.
 
-In order to use the code, you should instill [PyRep](https://github.com/stepjam/PyRep) first. Then, lets install AMSolver:
+## AMSolver Install
+Users can use AMSolver to run the current tasks in the VLMbench or build new tasks. In order to run the AMSolver, you should install [PyRep](https://github.com/stepjam/PyRep) first. Then, lets install AMSolver:
 
 ```bash
 pip install -r requirements.txt
 pip install .
 ```
 
-In the vlm folder, we have predefined some task categories and instance tasks for vlmbench dataset. If you want to customize your own task, the scripts in the tools folder can be helpful.
+In the vlm folder, we have predefined some task categories and instance tasks for VLMbench. If you want to customize your own task, the scripts in the tools folder can be helpful.
+
+## VLMbench Baselines
+
+The precollected dataset can be found at here: [Dataset](https://drive.google.com/drive/folders/17dEJrIIdlDsDF6T2rn04y7Yy8mUpKfCK?usp=sharing)
+
+The pretrained models can be found at here: [Model](https://drive.google.com/drive/folders/1yFbWhP2iHQvY04q8LNmrpT6_5ctTcZDk?usp=sharing)
+
+To train new 6D-CLIPort models:
+```bash
+python vlm/scripts/train_baselines.py
+```
+
+To test pretrained 6D-CLIPort models:
+```bash
+python vlm/scripts/cliport_test.py
+```
diff --git a/readme_files/tasks.svg b/readme_files/tasks.svg
diff --git a/tools/assets/task_template.txt b/tools/assets/task_template.txt
@@ -0,0 +1,28 @@
+from typing import List
+import numpy as np
+import os
+from rlbench.backend.task import Task
+from pyrep.objects.shape import Shape
+from pyrep.objects.proximity_sensor import ProximitySensor
+from rlbench.const import colors
+from rlbench.backend.conditions import DetectedCondition
+from rlbench.backend.spawn_boundary import SpawnBoundary
+from amsolver.backend.unit_tasks import T0_ObtainControl, T1_MoveObjectGoal, T2_MoveObjectConstraints, TargetSpace, VLM_Object
+
+class %s(Task):
+
+    def init_task(self) -> None:
+        # TODO: This is called once when a task is initialised.
+        pass
+
+    def init_episode(self, index: int) -> List[str]:
+        # TODO: This is called at the start of each episode.
+        return ['']
+
+    def variation_count(self) -> int:
+        # TODO: The number of variations for this task.
+        return 1
+
+    def step(self) -> None:
+        # Called during each sim step. Remove this if not using.
+        pass
diff --git a/vlm/scripts/train_baselines.py b/vlm/scripts/train_baselines.py
@@ -428,17 +428,17 @@ def val(data_loader, model, args, epoch):
     
     parser = argparse.ArgumentParser(description='')
     #dataset
-    parser.add_argument('--data_dir', type=str, default=',,/vlmbench')
+    parser.add_argument('--data_dir', type=str, default='../vlmbench')
     parser.add_argument('--setd', type=str, default='train')
-    parser.add_argument('--img_size',nargs='+', type=int, default=[256,256])
-    parser.add_argument('--batch_size', type=int, default=64, metavar='N',
-                        help='input batch size for training (default: 2)')
-    parser.add_argument('--workers', type=int, default=128)
+    parser.add_argument('--img_size',nargs='+', type=int, default=[224, 224])
+    parser.add_argument('--batch_size', type=int, default=8, metavar='N',
+                        help='input batch size for training (default: 8)')
+    parser.add_argument('--workers', type=int, default=32)
     parser.add_argument('--preprocess', action='store_true', 
                 help="whether preprocess the data. Next time can directly use. Add if you don't want it.")
     parser.add_argument('--unused_camera_list', nargs='+', default=['left_shoulder', 'right_shoulder', 'overhead','wrist'])
     parser.add_argument('--use_fail_cases', action='store_true', help="add if use the fail cases")
-    parser.add_argument('--sample_numbers', type=int, default=40, help="downsample from total demonstrations")
+    parser.add_argument('--sample_numbers', type=int, default=0, help="downsample from total demonstrations")
     parser.add_argument('--pin_memory', action='store_true', help="do not use if the RAM is small")
     parser.add_argument('--train_tasks', nargs='+', type=str, default = None)