Skip to content

Commit 5916e3f

Browse files
committed
add multiple GPUs usage comments for python files
1 parent 3fb5291 commit 5916e3f

File tree

3 files changed

+12
-0
lines changed

3 files changed

+12
-0
lines changed

api.py

+4
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,9 @@ async def create_item(request: Request):
5252
if __name__ == '__main__':
5353
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
5454
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
55+
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
56+
# model_path = "THUDM/chatglm2-6b"
57+
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
58+
# model = load_model_on_gpus(model_path, num_gpus=2)
5559
model.eval()
5660
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)

openai_api.py

+4
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ async def predict(query: str, history: List[List[str]], model_id: str):
158158
if __name__ == "__main__":
159159
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
160160
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
161+
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
162+
# model_path = "THUDM/chatglm2-6b"
163+
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
164+
# model = load_model_on_gpus(model_path, num_gpus=2)
161165
model.eval()
162166

163167
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)

web_demo2.py

+4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
def get_model():
1515
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
1616
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
17+
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
18+
# model_path = "THUDM/chatglm2-6b"
19+
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
20+
# model = load_model_on_gpus(model_path, num_gpus=2)
1721
model = model.eval()
1822
return tokenizer, model
1923

0 commit comments

Comments
 (0)