Skip to content

Commit b09e0c5

Browse files
committed
Add multi-gpu support
1 parent 549cdeb commit b09e0c5

File tree

4 files changed

+12
-17
lines changed

4 files changed

+12
-17
lines changed

cli_demo.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,12 @@
33
import signal
44
from transformers import AutoTokenizer, AutoModel
55
import readline
6-
from utils import load_model_on_gpus
76

87
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
98
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
10-
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
11-
# model_path = "THUDM/chatglm2-6b"
12-
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
13-
# model = load_model_on_gpus(model_path, num_gpus=2)
9+
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
10+
# from utils import load_model_on_gpus
11+
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
1412
model = model.eval()
1513

1614
os_name = platform.system()

openai_api.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,9 @@ async def predict(query: str, history: List[List[str]], model_id: str):
158158
if __name__ == "__main__":
159159
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
160160
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
161-
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
162-
# model_path = "THUDM/chatglm2-6b"
163-
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
164-
# model = load_model_on_gpus(model_path, num_gpus=2)
161+
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
162+
# from utils import load_model_on_gpus
163+
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
165164
model.eval()
166165

167166
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)

web_demo.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55

66
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
77
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
8-
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
9-
# model_path = "THUDM/chatglm2-6b"
10-
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
11-
# model = load_model_on_gpus(model_path, num_gpus=2)
8+
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
9+
# from utils import load_model_on_gpus
10+
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
1211
model = model.eval()
1312

1413
"""Override Chatbot.postprocess"""

web_demo2.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@
1414
def get_model():
1515
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
1616
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
17-
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
18-
# model_path = "THUDM/chatglm2-6b"
19-
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
20-
# model = load_model_on_gpus(model_path, num_gpus=2)
17+
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
18+
# from utils import load_model_on_gpus
19+
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
2120
model = model.eval()
2221
return tokenizer, model
2322

0 commit comments

Comments
 (0)