Skip to content

Commit 60a6d8d

Browse files
authored
Modify readme about container multiple GPUs usage (#63)
1 parent 6d49241 commit 60a6d8d

2 files changed

Lines changed: 50 additions & 18 deletions

File tree

README.md

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,23 +51,38 @@ pip install -r requirements.txt
5151

5252
### Container
5353

54-
A more convenient approach is to use the official Docker container:
54+
You can use the official Docker container to run the model more easily. To do this, follow these steps:
5555

56-
~~~shell
57-
docker pull ghcr.io/modeltc/lightllm:main
58-
docker run -it --gpus all -p 8080:8080 \
59-
-v your_local_path:/data/ \
60-
ghcr.io/modeltc/lightllm:main /bin/bash
61-
~~~
56+
- Pull the container from the GitHub Container Registry:
6257

63-
Or build the container locally:
58+
```shell
59+
docker pull ghcr.io/modeltc/lightllm:main
60+
```
6461

65-
~~~shell
66-
docker build -t <image_name> .
67-
docker run -it --gpus all -p 8080:8080 \
68-
-v your_local_path:/data/ \
69-
<image_name> /bin/bash
70-
~~~
62+
- Run the container with GPU support and port mapping:
63+
64+
```shell
65+
docker run -it --gpus all -p 8080:8080 \
66+
-v your_local_path:/data/ \
67+
ghcr.io/modeltc/lightllm:main /bin/bash
68+
```
69+
70+
- Alternatively, you can build the container yourself:
71+
72+
```shell
73+
docker build -t <image_name> .
74+
docker run -it --gpus all -p 8080:8080 \
75+
-v your_local_path:/data/ \
76+
<image_name> /bin/bash
77+
```
78+
79+
- You can also use a helper script to launch both the container and the server:
80+
81+
```shell
82+
python tools/quick_launch_docker.py --help
83+
```
84+
85+
- Note: If you use multiple GPUs, you may need to increase the shared memory size by adding `--shm-size` to the `docker run` command.
7186

7287
### Installation
7388

tools/quick_launch_docker.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,31 @@
1919
action="store_true",
2020
help="default not to keep the container",
2121
)
22+
group_container.add_argument(
23+
"--shm-size",
24+
type=str,
25+
required=False,
26+
help="default to half of the RAM size",
27+
)
2228

2329
group_server = args.add_argument_group("server")
24-
group_server.add_argument("-m", "--model", type=str, required=True)
30+
group_server.add_argument(
31+
"-m", "--model", type=str, required=True, help="path to model dir"
32+
)
2533
group_server.add_argument("-p", "--port", type=int, default=8080)
26-
group_server.add_argument("-n", "--num-nodes", type=int, default=1)
34+
group_server.add_argument(
35+
"-n", "--num-proc", type=int, default=1, help="number of process/gpus"
36+
)
2737
group_server.add_argument("-mt", "--max-total-tokens", type=int, default=4096)
2838
args = args.parse_args()
2939

3040
model_path = os.path.abspath(args.model)
41+
shm_size = (
42+
args.shm_size
43+
if args.shm_size
44+
else (os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") // 2)
45+
)
3146

32-
print(args)
3347
launch_args = [
3448
"docker",
3549
"run",
@@ -40,6 +54,8 @@
4054
f"{args.port}:{args.port}",
4155
"-v",
4256
f"{model_path}:{model_path}",
57+
"--shm-size",
58+
str(shm_size),
4359
]
4460
if args.name:
4561
launch_args.extend(["--name", args.name])
@@ -61,9 +77,10 @@
6177
"--port",
6278
args.port,
6379
"--tp",
64-
args.num_nodes,
80+
args.num_proc,
6581
]
6682
)
83+
6784
launch_args = list(map(str, launch_args))
6885
print(f'launching: {" ".join(launch_args)}')
6986
os.execvp(launch_args[0], launch_args)

0 commit comments

Comments
 (0)