Modify readme about container multiple GPUs usage (#63)

llehtahw · web-flow · commit 60a6d8deca24 · 2023-08-13T00:24:48.000+08:00
diff --git a/README.md b/README.md
@@ -51,23 +51,38 @@ pip install -r requirements.txt
 
 ### Container
 
-A more convenient approach is to use the official Docker container:
+You can use the official Docker container to run the model more easily. To do this, follow these steps:
 
-~~~shell
-docker pull ghcr.io/modeltc/lightllm:main
-docker run -it --gpus all -p 8080:8080                  \
-           -v your_local_path:/data/                    \
-           ghcr.io/modeltc/lightllm:main /bin/bash
-~~~
+- Pull the container from the GitHub Container Registry:
 
-Or build the container locally:
+    ```shell
+    docker pull ghcr.io/modeltc/lightllm:main
+    ```
 
-~~~shell
-docker build -t <image_name> .
-docker run -it --gpus all -p 8080:8080                  \
-           -v your_local_path:/data/                    \
-           <image_name> /bin/bash
-~~~
+- Run the container with GPU support and port mapping:
+
+    ```shell
+    docker run -it --gpus all -p 8080:8080                  \
+            -v your_local_path:/data/                       \
+            ghcr.io/modeltc/lightllm:main /bin/bash
+    ```
+
+- Alternatively, you can build the container yourself:
+
+    ```shell
+    docker build -t <image_name> .
+    docker run -it --gpus all -p 8080:8080                  \
+            -v your_local_path:/data/                       \
+            <image_name> /bin/bash
+    ```
+
+- You can also use a helper script to launch both the container and the server:
+
+    ```shell
+    python tools/quick_launch_docker.py --help
+    ```
+
+- Note: If you use multiple GPUs, you may need to increase the shared memory size by adding `--shm-size` to the `docker run` command.
 
 ### Installation
 
diff --git a/tools/quick_launch_docker.py b/tools/quick_launch_docker.py
@@ -19,17 +19,31 @@
     action="store_true",
     help="default not to keep the container",
 )
+group_container.add_argument(
+    "--shm-size",
+    type=str,
+    required=False,
+    help="default to half of the RAM size",
+)
 
 group_server = args.add_argument_group("server")
-group_server.add_argument("-m", "--model", type=str, required=True)
+group_server.add_argument(
+    "-m", "--model", type=str, required=True, help="path to model dir"
+)
 group_server.add_argument("-p", "--port", type=int, default=8080)
-group_server.add_argument("-n", "--num-nodes", type=int, default=1)
+group_server.add_argument(
+    "-n", "--num-proc", type=int, default=1, help="number of process/gpus"
+)
 group_server.add_argument("-mt", "--max-total-tokens", type=int, default=4096)
 args = args.parse_args()
 
 model_path = os.path.abspath(args.model)
+shm_size = (
+    args.shm_size
+    if args.shm_size
+    else (os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") // 2)
+)
 
-print(args)
 launch_args = [
     "docker",
     "run",
@@ -40,6 +54,8 @@
     f"{args.port}:{args.port}",
     "-v",
     f"{model_path}:{model_path}",
+    "--shm-size",
+    str(shm_size),
 ]
 if args.name:
     launch_args.extend(["--name", args.name])
@@ -61,9 +77,10 @@
         "--port",
         args.port,
         "--tp",
-        args.num_nodes,
+        args.num_proc,
     ]
 )
+
 launch_args = list(map(str, launch_args))
 print(f'launching: {" ".join(launch_args)}')
 os.execvp(launch_args[0], launch_args)