Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
543 commits
Select commit Hold shift + click to select a range
425205b
[Doc] fix the port conflict issue in the usage example (#4379)
EmmonsCurse Oct 13, 2025
a751d97
[Optimization] Fuse get_max_len and get_kv_max_len (#4369)
Sunny-bot1 Oct 13, 2025
6f53b67
[CI] fix diff_error temporarily (#4390)
EmmonsCurse Oct 14, 2025
73c8e08
【Hackathon 9th No.67】add speculate_verify (#4326)
co63oc Oct 14, 2025
14eb8b4
add x1 a3b quantization (#4397)
tianlef Oct 14, 2025
7f85f00
fix offline inference doc (#4412)
ApplEOFDiscord Oct 14, 2025
c1a2e78
add install docs (#4414)
yangjianfengo1 Oct 14, 2025
4b647d1
[CI] Fix partial instability issues (#4418)
EmmonsCurse Oct 15, 2025
c4f866c
update benchmark tools (#4416)
ZhangYulongg Oct 15, 2025
6d0cc0d
[Optimization] Optimize split_q_block kernel (#4367)
Sunny-bot1 Oct 15, 2025
d6f775e
[XPU] fix ep (#4393)
zhupengyang Oct 15, 2025
28d1b6c
[BugFix] fix multinode bugs (#4377)
ltd0924 Oct 15, 2025
abb6262
[fix] Fixed the issue of excessive/redundant spans being returned for…
qwes5s5 Oct 15, 2025
ffe7af8
[fix] fix requests & block metrics (#4404)
liyonghua0910 Oct 15, 2025
582aebd
[MTP]support mtp chunk_prefill_v1 (#4366)
freeliuzc Oct 15, 2025
4efd073
fix block_wise_fp8_v1_loader_moe_shape (#4384)
ckl117 Oct 15, 2025
6938df9
【Fix CI Bug】Fix ci bug (#4413)
chang-wenbin Oct 15, 2025
e98c1c2
Disable gcu ci (#4427)
tianshuo78520a Oct 15, 2025
bcaa98f
V1 loader default (#4251)
bukejiyu Oct 15, 2025
d8841b7
[BugFix] fix workers=1 (#4364)
ltd0924 Oct 15, 2025
bdc0207
[XPU] fix VL multi-batch accuracy issue (#4394)
cqulilujia Oct 15, 2025
fbdb056
[BUGFIX] clear request #4286 (#4402)
ltd0924 Oct 15, 2025
744287e
fix param (#4419)
freeliuzc Oct 15, 2025
b4b579a
Feature:Add support for Pooling Model Embedding and provide an OpenAI…
sunlei1024 Oct 15, 2025
7f94f06
Update benchmark_serving.py (#4438)
ZhangYulongg Oct 15, 2025
5bde20b
[BugFix] fix config bugs (#4370)
ltd0924 Oct 16, 2025
8e392f0
[XPU] support prefix cache (#4423)
ddchenhao66 Oct 16, 2025
670aaa3
[Bug fix] Fix pd for x1 thinking (#4433)
rainyfly Oct 16, 2025
fd5dd1a
[Bugfix]fix ep clear buffer perf (#4389)
gzy19990617 Oct 16, 2025
a5063b9
[XPU] moe support VL 0-dim input (#4408)
cqulilujia Oct 16, 2025
9dc3968
[benchmark] Fix benchmark duration calculation logic (#4446)
ZhangYulongg Oct 16, 2025
98f8c37
Add filtering for failed requests in benchmark outputs (#4448)
ZhangYulongg Oct 16, 2025
5abf597
perf: optimize ZMQ communication with async queue and single-threaded…
sunlei1024 Oct 16, 2025
f72be7a
[BUG] fix ep bug (#4275)
kevincheng2 Oct 16, 2025
6adfbe0
【Hackathon 9th No.86】autogen `MultiQueryDecoderAttention` template_in…
ccsuzzh Oct 16, 2025
8f77adc
Add data dictionary for API response processing (#4454)
ZhangYulongg Oct 16, 2025
4251ac5
【Fix】 remove text_after_process & raw_prediction (#4421)
LiqinruiG Oct 16, 2025
3bbe99e
[Intel HPU] Enable dist sampler on intel hpu platform (#4445)
JianyuLi01 Oct 16, 2025
26ff2f8
[XPU] refine fused moe (#4219)
zhupengyang Oct 16, 2025
b87e2c6
[CUDAGraph]Add support for custom all-reduce operators under SOT mode…
DrRyanHuang Oct 16, 2025
0355235
[FDConfig]Remove total_block_num/dtype/block_size/enc_dec_block_num i…
YuanRisheng Oct 16, 2025
dbca63f
[bugfix] kill cache_transfer_manager process (#4401)
xiaolei373 Oct 16, 2025
db82e9a
[BugFix]Fix wfp8afp8 triton moe group_topk renormalized=True (#4449)
ckl117 Oct 16, 2025
d1637db
modify_comment (#4460)
xiaolei373 Oct 17, 2025
a37c941
[FDConfig]Remove reasoning_parser/guided_decoding_backend/disable_any…
YuanRisheng Oct 17, 2025
49cea8f
[SOT][Cudagraph] Remove BreakGraph of #3302 && update CustomOp (#3694)
DrRyanHuang Oct 17, 2025
930f7b7
[Optimization] Put get_block_shape_and_split_kv_block in cuda graph f…
Sunny-bot1 Oct 17, 2025
5885953
[Others] add PR Template (#4452)
zeroRains Oct 17, 2025
0413c32
[Optimize] Set preempted schedule log as info level (#4453)
rainyfly Oct 17, 2025
6160145
[SOT] Change warnings to errors and remove fallback operations (#4378)
DrRyanHuang Oct 17, 2025
b134e6a
[BugFix]Dev fix custom ar unstable result (#4437)
ckl117 Oct 17, 2025
15b6b8d
[CINN] Remove the restriction of automatically falling back to SOT af…
DrRyanHuang Oct 17, 2025
c234b99
[Feature] support pooling model dummy_run (#4345)
lizexu123 Oct 17, 2025
14785eb
[XPU] abstract a hardware-agnostic operator wrapper for prefix cache …
ddchenhao66 Oct 17, 2025
0151087
[CI] Fix partial instability issues (#4461)
EmmonsCurse Oct 17, 2025
720697e
add environment variables (#4466)
xiaolei373 Oct 17, 2025
a3e0a15
fix seqlen sync (#4442)
aquagull Oct 17, 2025
ba5c2b7
[Docx] add language (en/cn) switch links (#4470)
yangjianfengo1 Oct 17, 2025
4b66151
[Iluvatar GPU] Adapt VL model (#4313)
wuyujiji Oct 17, 2025
63ef593
check paddle version for v1 loader (#4473)
ckl117 Oct 17, 2025
a64c040
[XPU]Fix w4a8 precision bug && rollback moe algo (#4463)
iosmers Oct 17, 2025
329d074
[Docx] fix the broken link (#4479)
yangjianfengo1 Oct 17, 2025
8ccfd97
LLM.chat add "tools" param (#4415)
kxz2002 Oct 17, 2025
b5b993e
【feature】support n parameter (#4273)
kxz2002 Oct 17, 2025
817210e
[ATTN]delete code and add ffn and moe layer level test (#4440)
zhoutianzi666 Oct 19, 2025
c4fc007
[CI] Handle unit test issues (#4483)
EmmonsCurse Oct 20, 2025
528c557
[Graph Optimization][Speculative Decoding] Fix the bug of CUDAGraph +…
gongshaotian Oct 20, 2025
1e59905
Optimization of ‘tools’ in request fields (#4380)
AuferGachet Oct 20, 2025
80a16c4
[fix] adjust mctlass moe api (#4474)
handsomecoderyang Oct 20, 2025
1b9f351
Support GPT-OSS-BF16 (#4240)
Limerances Oct 20, 2025
47595a2
[Feature] support mtp logprob (#4464)
Deleter-D Oct 20, 2025
de2eaf4
add qwen-2.5-7B-PRM/ernie-rm (#4319)
bukejiyu Oct 20, 2025
b8d2354
[fix] remove cache tensor creation for cache_transfer_manager (#4420)
liyonghua0910 Oct 20, 2025
10e85da
update benchmark scripts (#4497)
ZhangYulongg Oct 20, 2025
97ee3c4
[XPU]Fix w4a8 garbled code issue (#4493)
yyssys Oct 20, 2025
bf03b6f
fix vl bug (#4485)
iosmers Oct 20, 2025
36af88f
[BugFix][CI] Clean up SOT code cache using `tearDown` in CINN unitest…
DrRyanHuang Oct 20, 2025
cef3164
Optimizing the performance of think length limit using custom operato…
yuanlehome Oct 20, 2025
a498736
[APIServer] support define gunicorn timeout (#4496)
ltd0924 Oct 20, 2025
70a29ec
[CI] update ernie-4_5-vl baseline (#4495)
EmmonsCurse Oct 21, 2025
d85ef53
【BugFix】fix ep buffer clear (#4450)
gzy19990617 Oct 21, 2025
9956434
[XPU] bind block_attn kernel with pybind (#4499)
cqulilujia Oct 21, 2025
775edcc
[Executor] Default use CUDAGraph (#3594)
gongshaotian Oct 21, 2025
ee91522
[Speculative Decoding] Add draft_logprobs Support for Speculative Dec…
sunlei1024 Oct 21, 2025
2b53c4d
【CI】Add test cases for n parameter and streaming validation (#4503)
DDDivano Oct 21, 2025
fb76cdf
[Fearture] Support mm model close prefix cache (#4459)
ltd0924 Oct 21, 2025
153f15d
[Doc]add deepseek wint4 ce (#4517)
tianlef Oct 21, 2025
7cbe6b2
[FDConfig] Turn on the CUDAGraph + Speculative Decoding switch (#4511)
gongshaotian Oct 21, 2025
1753913
Add comprehensive unit tests for limit_thinking_content_length operat…
Copilot Oct 21, 2025
2bd3fb6
[XPU]add xpu ci ep case (#4432)
plusNew001 Oct 21, 2025
809c1ac
feat: add post-processing step for pool_output (#4462)
sunlei1024 Oct 21, 2025
d70aacf
[FDConfig] Turn on the CUDAGraph + MultiModel switch (#4512)
gongshaotian Oct 21, 2025
dc7faca
[Iluvatar GPU] fix ci error caused by rebuild_padding param and cuda …
wuyujiji Oct 21, 2025
3b58310
enhance set_stop_value_multi_ends and standardize the registration of…
yuanlehome Oct 21, 2025
f69c9cd
[CI] Remove redundant .coveragerc file (#4521)
EmmonsCurse Oct 21, 2025
cd9195d
[XPU]Modify the xpu memory display unit of log (#4534)
yyssys Oct 22, 2025
b6cd3ae
[Feature] support fd return decode response (#4407)
zhuangzhuang12 Oct 22, 2025
dce9888
[Feature] Support AsyncLLM (#4458)
xyxinyang Oct 22, 2025
8e02a50
[CI] stable test_rollout_model.py (#4536)
yuanlehome Oct 22, 2025
d7bcedf
small change in test_fusedmoe.py (#4538)
zhoutianzi666 Oct 22, 2025
3a6883a
c++ code format (#4527)
zhupengyang Oct 22, 2025
918e4e9
[XPU] Change XPU stable third-party version (#4524)
plusNew001 Oct 22, 2025
8a02ab4
[FDConfig]Turn on the CUDAGraph + RL switch (#4508)
gongshaotian Oct 23, 2025
ac4f5ca
delete useless code (#4544)
YuanRisheng Oct 23, 2025
bbf06b9
[BugFix]Fix finish reason (#4543)
luukunn Oct 23, 2025
2676a91
[Doc]fix deepseek ce (#4560)
tianlef Oct 23, 2025
5443b2c
[XPU] xpu support think length limit (#4539)
ddchenhao66 Oct 23, 2025
a240425
[CI] Optimize coverage upload reporting (#4547)
EmmonsCurse Oct 23, 2025
4ffe41a
WINT4/WINT8 dense gemm default use Machete (#4451)
Sunny-bot1 Oct 23, 2025
3a43dbf
[XPU] merge apply_tp, ops support token_num = 0 (#4507)
zhupengyang Oct 23, 2025
5a8c604
[BugFix] Fix decode_type which has been deleted in req and optimize t…
RichardWooSJTU Oct 23, 2025
9dc5c3e
[Graph Optimization] Support CUDAGraph Padding + MTP (#4545)
gongshaotian Oct 23, 2025
e36343d
[FDConfig]Turn on the CUDAGraph + PD Disaggregation switch (#4530)
gongshaotian Oct 23, 2025
8718fa3
support static C8 (#4568)
Sunny-bot1 Oct 23, 2025
f7069b8
[Metax] adapt DeepSeek (#4498)
xiaozude Oct 24, 2025
8edc5cc
[BugFix] fix create_cache_tensor for ep (#4542)
liyonghua0910 Oct 24, 2025
b60ce49
[EP] fix adapter bugs (#4572)
ltd0924 Oct 24, 2025
5fbc653
fix v1 hang bug (#4573)
iosmers Oct 24, 2025
83d45af
fix import image_ops error on some platforms (#4559)
zoooo0820 Oct 24, 2025
e02a812
[CLI]Update parameters in bench latecy cli tool and fix collect-env c…
qwes5s5 Oct 24, 2025
f42ed6d
[Graph Optimization] Add dy_runnable and introduce cudagraph_switch_t…
DrRyanHuang Oct 24, 2025
822dea8
[XPU]Moe uses a new operator (#4585)
yyssys Oct 24, 2025
e4e3ced
[Feature] Support Paddle-OCR (#4396)
ming1753 Oct 24, 2025
327fa4c
[DataProcessor] add reasoning_tokens into usage info (#4520)
kxz2002 Oct 25, 2025
dc1a9c7
perf: Optimize task queue communication from engine to worker (#4531)
sunlei1024 Oct 25, 2025
83b7208
Clean up ports after processing results (#4587)
ZhangYulongg Oct 27, 2025
ebae69b
[CI] Add /re-run command in PR comments to restart failed CI workflow…
EmmonsCurse Oct 27, 2025
cdc40cd
[Others] api server exits when worker process is dead (#3271)
liyonghua0910 Oct 27, 2025
5c6105f
[XPU] bind some OPs for VL model with pybind (#4522)
cqulilujia Oct 27, 2025
acd3317
[V1 loader] Qwen25 VL support v1 loader and torch style safetensors l…
CSWYF3634076 Oct 27, 2025
5c63a08
[Feature] Support logprobs_mode (#4567)
ckl117 Oct 27, 2025
a4fb3d4
[CI] Fix path error of /re-run (#4606)
EmmonsCurse Oct 27, 2025
8aab4e3
[Feature] mm support prefix cache (#4134)
kevincheng2 Oct 27, 2025
25a983b
1.fix the bug of draft model with ep 2.fix sampler bug (#4589)
gongshaotian Oct 27, 2025
c91c504
[XPU] update kunlun doc about supported models (#4586)
ddchenhao66 Oct 27, 2025
70aa742
benchmark工具适配SGLang框架 (#4607)
ophilia-lee Oct 27, 2025
64d1aa9
[Unitest]Add unitest of Attention Layer (#4494)
K11OntheBoat Oct 27, 2025
3729e91
remove dev sync in prefill (#4598)
zhoutianzi666 Oct 27, 2025
6dcf5a3
fix: resolve decode bug in offline stream output (#4603)
xyxinyang Oct 27, 2025
7681375
[BugFix] PaddleOCR-VL fix FD_DEBUG type and support v1 loader (#4605)
ming1753 Oct 28, 2025
6426414
[Feature] EngineWorkerQueue anonymous port (#4597)
ST-XX Oct 28, 2025
0b196d8
[docs] add cli uasge to docs (#4569)
xiaolei373 Oct 28, 2025
31180a6
fix_run_batch_unittest (#4613)
xiaolei373 Oct 28, 2025
b2c6c41
[CI] Relocate server test cases from ci_use directory to e2e (#4608)
EmmonsCurse Oct 28, 2025
86d5006
[Graph Optimization][Speculative Decoding] Update yaml and fix typo (…
gongshaotian Oct 28, 2025
b401483
Extend sleep time to 10 seconds in switch_service (#4618)
ZhangYulongg Oct 28, 2025
c63361f
[Speculative Decoding][MTP]Support mtp in epdptp mode (#4614)
freeliuzc Oct 28, 2025
4d2f478
[BugFix] fix TPDP mix parallel infer (#4583)
lizhenyun01 Oct 28, 2025
07956a8
[Graph Optimization] Fix IR graph dependency error exposed after enab…
DrRyanHuang Oct 28, 2025
cd6d1f6
[XPU]add xpu ci w4a8 case (#4501)
yyssys Oct 28, 2025
567f610
[CI][BugFix] fix port conflicts in concurrent ci test and add more un…
xyxinyang Oct 28, 2025
e1ac90d
[CI] Revert test_rollout_model directory change (#4626)
EmmonsCurse Oct 28, 2025
2a9ed72
feat: add support for API usage with multimodal models (#4548)
sunlei1024 Oct 28, 2025
0a0c74e
[XPU] Support PaddleOCR-VL model for XPU (#4529)
cqulilujia Oct 28, 2025
fff5fb5
[Graph Optimization] Refactor default capture list (#4617)
gongshaotian Oct 28, 2025
561b9f3
[BugFix] fix paddleocr prefix cache bug (#4625)
ming1753 Oct 28, 2025
20756cd
fix import jit.marker.unified (#4622)
yuanlehome Oct 28, 2025
24b9505
add einops dependency (#4633)
zhang-prog Oct 28, 2025
a012e36
[Feature] support logits processors (#4515)
liyonghua0910 Oct 28, 2025
14e7d88
[feature] support reward api (#4518)
xiaolei373 Oct 28, 2025
0dde936
[BugFix] fix total_block_num init error in worker_process (#4553)
RichardWooSJTU Oct 29, 2025
fc5cd1a
[BugFix] Fix graph opt test case (#4634)
gongshaotian Oct 29, 2025
14f8cdd
[Feature] add mm token usage (#4570)
ApplEOFDiscord Oct 29, 2025
c92eeed
[XPU] Update the return value of TextImageGatherScatter (#4636)
ddchenhao66 Oct 29, 2025
d68345c
[Docs] Add PaddleOCR-VL-0.9B best practices (#4658)
ming1753 Oct 29, 2025
8f40dfa
[XPU] fix pos_emb_type bug (#4638)
cqulilujia Oct 29, 2025
19df1ae
[Docs] add Qwen25vl yaml (#4662)
xjkmfa Oct 29, 2025
c30bfb2
[Feature] add a new reasoning parser (#4571)
kxz2002 Oct 29, 2025
fd50152
Increase pytest timeout for XPU test (#4665)
plusNew001 Oct 29, 2025
dab04ab
add noaux_tc to unitest fused_moe (#4656)
zhoutianzi666 Oct 29, 2025
50be19a
[EP] fix several bugs in data parallel (#4657)
ltd0924 Oct 30, 2025
e25c067
[OP] Add InferShape&InferDtype for `per_token_quant_padding` (#4667)
DrRyanHuang Oct 30, 2025
1712e13
【Hackathon 9th No.86】autogen `MoeFastHardamardImplWrapper` template_i…
ccsuzzh Oct 30, 2025
fddda50
Add ut for speculative sampler (#4650)
Deleter-D Oct 30, 2025
cfdd160
update doc (#4675)
ApplEOFDiscord Oct 30, 2025
cd3b7cc
[Graph Optimization] Add the CUDAGraph usage switch for Draft Model (…
gongshaotian Oct 30, 2025
d7d0112
[CI] Add test for paddleocr_vl (#4627)
Limerances Oct 30, 2025
f1de348
Update common_engine.py
Jiang-Jia-Jun Oct 30, 2025
8b9c946
add real gate_correction_bias weight to mock un-balanced dispatch (#…
zhoutianzi666 Oct 30, 2025
ca52cad
Update multi-node_deployment.md
Jiang-Jia-Jun Oct 30, 2025
ec7746b
Update multi-node_deployment.md
Jiang-Jia-Jun Oct 30, 2025
0089287
[noauxtc_kernel] remove useless code (#4643)
zhoutianzi666 Oct 30, 2025
7dc9d98
[BugFix] fix offline llm chat "enable_thinking" is always "False" (#4…
kxz2002 Oct 30, 2025
35286ce
fix total_block_num init error in worker_process (#4687)
xyxinyang Oct 30, 2025
b73a781
fix --logprobs-mode raw_logits (#4681)
ckl117 Oct 30, 2025
b87384a
[XPU] xpu currently disable prefix cache for VL model (#4695)
ddchenhao66 Oct 31, 2025
ea866e4
[XPU] [CI] Add Vl case (#4649)
plusNew001 Oct 31, 2025
82bd7e5
[BugFix] Fix finish reason in _create_chat_completion_choice (#4582)
kxz2002 Oct 31, 2025
a2870ed
[Feature] Unify the registration name recognition for tool_parser and…
kxz2002 Oct 31, 2025
b61a272
[BugFix] fix unittest of get_save_output_v1 (#4701)
Wanglongzhi2001 Oct 31, 2025
937bcfc
[XPU] [CI] Lock xvllm version (#4715)
plusNew001 Oct 31, 2025
28de91b
[Graph Optimization] SOT+CUDAGraph support ERNIE4.5T VL 28B / 424B (…
DrRyanHuang Oct 31, 2025
1f3ce65
[Feature] support mtp distribution equivalence verification (#4699)
Deleter-D Oct 31, 2025
9b18f0b
cache scale load (#4624)
Sunny-bot1 Oct 31, 2025
10de7a3
add flops and bandwidth to test_ffn.py (#4704)
zhoutianzi666 Oct 31, 2025
412097c
benchmark工具支持受限解码场景指定response_format (#4718)
ophilia-lee Oct 31, 2025
dde7ba3
[CI]add_tokenizer_cli_unitest (#4620)
xiaolei373 Oct 31, 2025
64e875b
[Scheduler] update v1 prefill batch (#4611)
kevincheng2 Oct 31, 2025
0f75b62
[BugFix] Fix profile run in pd-disaggregated deployment (#4584)
liyonghua0910 Oct 31, 2025
096d87d
fix bug (#4679)
kevincheng2 Oct 31, 2025
c801d31
add checker (#4711)
kevincheng2 Oct 31, 2025
316f784
fix wint2 config (#4721)
chang-wenbin Oct 31, 2025
88a94c8
[FDConfig] [PD Disaggregation] [Graph Optimization] Close Cudagraph f…
littledgg Oct 31, 2025
3cbca75
[XPU] xpu support neox style ROPE (#4719)
ddchenhao66 Oct 31, 2025
2774602
Skip building native architecture when specifying arch list (#4727)
ming1753 Oct 31, 2025
10358bf
fix noaux (#4731)
zhoutianzi666 Oct 31, 2025
b301bd6
[BugFix] fix thinking bug (#4710)
yuanlehome Oct 31, 2025
acef624
[CI] Fix rollout_model test logic (#4730)
EmmonsCurse Oct 31, 2025
4ac6de9
[Feature] support pooling model runner (#4590)
lizexu123 Oct 31, 2025
6e01be2
format code (#4720)
zhoutianzi666 Nov 1, 2025
9eff788
[CI] fix some ci yaml (#4747)
EmmonsCurse Nov 2, 2025
b8bf571
[Docs]Update XPU document version to 2.3.0 (#4741)
yyssys Nov 3, 2025
f44f4ba
support mtp in splitewise and scheduler_v1 mode (#4743)
freeliuzc Nov 3, 2025
1139879
[Speculative Decoding][MTP]Support attn mask offset (#4641)
freeliuzc Nov 3, 2025
44ce91a
[Docs]Add parameter to the start service command (#4753)
yyssys Nov 3, 2025
b1dd508
[Docs]Add parameter (#4755)
yyssys Nov 3, 2025
c657f8d
[Docs] fix PaddleOCR-VL docs bug (#4702)
ming1753 Nov 3, 2025
f83d0cf
[Feature] Support eplb for fd (#4599)
rainyfly Nov 3, 2025
377f3bf
[XPU] add v1 support for bf16 (#4744)
iosmers Nov 3, 2025
7b35488
【DataProcessor】add options thinking_mode (#4735)
luukunn Nov 3, 2025
25498ef
[Optimize] Support and robust for tpN for PD (#4595)
rainyfly Nov 3, 2025
9ec29f6
[Docs]fix error (#4768)
yyssys Nov 3, 2025
69c2f3c
[CI]test common model (#4697)
bukejiyu Nov 3, 2025
c95d074
[Metax] adapt cutlass moe for ernie-vl (#4685)
neilzhuu Nov 3, 2025
8690cf8
fix Cfp8 for RL load (#4144)
rsmallblue Nov 3, 2025
35a6969
[Docs] PaddleOCR-VL add RTX3060 server param (#4765)
ming1753 Nov 3, 2025
5233825
test scheduler (#4739)
kevincheng2 Nov 3, 2025
9887025
Update run_w4a8.py (#4783)
plusNew001 Nov 3, 2025
855a2a6
fix attn_params (#4787)
freeliuzc Nov 4, 2025
41bfa10
[CI]delete test_common_model (#4794)
bukejiyu Nov 4, 2025
4a49487
Update mkdocs.yml
Jiang-Jia-Jun Nov 4, 2025
bffa08b
[XPU] fix thinking bug where output only contains reasoning_content (…
ddchenhao66 Nov 4, 2025
007ee71
[XPU] add deploy doc for PaddleOCR-VL in XPU (#4784)
cqulilujia Nov 4, 2025
8a40374
[BugFix] Fix ernie4_5_vl_processor.py and qwen_vl_processor.py can no…
kxz2002 Nov 4, 2025
af7e0f2
supports internode_ll_two_stage (#4162)
carryyu Nov 4, 2025
3e9dda3
supports pd partn (#4615)
carryyu Nov 4, 2025
9547fa2
[Docs] Add new support models (#4801)
ming1753 Nov 4, 2025
722110a
[CI] Refactor CE wheel upload for multiple target paths (#4790)
EmmonsCurse Nov 4, 2025
7325264
updata mkdocs.yml (#4804)
yangjianfengo1 Nov 4, 2025
1b61d62
[fix] fix v0 pd, let worker step_shm_value create=False (#4780)
liyonghua0910 Nov 4, 2025
2c281e6
Update Unit Test for PaddleOCR-VL (#4802)
Limerances Nov 4, 2025
7472230
[Metax] adapt cutlass moe and fix mla attention (#4602)
xiaozude Nov 5, 2025
1c3ca48
[Feature][Executor] GPU Model Runner Supports prompt_logprobs and max…
ckl117 Nov 5, 2025
937eb3c
[get_padding_offset.] clean get_padding_offset.cu (#4777)
zhoutianzi666 Nov 5, 2025
2fd254e
support ep+tp at op layer (#4688)
zhupengyang Nov 5, 2025
9676cc8
fix parser register name (#4795)
kxz2002 Nov 5, 2025
876e4a8
remove input_ids from ForwardMeta (#4793)
zhoutianzi666 Nov 5, 2025
cc8f531
[Feature] Add timestamp for profiler (#4726)
rainyfly Nov 5, 2025
ea1dd0e
[XPU]Support V1 loader in weight_only Model (#4808)
iosmers Nov 5, 2025
131d76d
[Bug Fix] process transparent image (#4807)
ApplEOFDiscord Nov 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
---
Language: Cpp
BasedOnStyle: Google
IndentWidth: 4
IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -1 # The private/protected/public has no indent in class
Expand Down
30 changes: 30 additions & 0 deletions .github/actions/rerun-workflow/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: 'Rerun Workflow'
description: 'Re-run GitHub Actions workflow for a given Pull Request'
inputs:
GITHUB_TOKEN:
description: 'GitHub token with repo scope'
required: true
OWNER:
description: 'Repository owner'
required: true
REPO:
description: 'Repository name'
required: true
PR_ID:
description: 'Pull Request ID'
required: true
JOB_NAME:
description: 'Job name to rerun'
required: true

runs:
using: 'composite'
steps:
- run: bash ./.github/actions/rerun-workflow/rerun.sh
shell: bash
env:
GITHUB_TOKEN: ${{ inputs.GITHUB_TOKEN }}
OWNER: ${{ inputs.OWNER }}
REPO: ${{ inputs.REPO }}
PR_ID: ${{ inputs.PR_ID }}
JOB_NAME: ${{ inputs.JOB_NAME }}
77 changes: 77 additions & 0 deletions .github/actions/rerun-workflow/rerun.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

COMMIT_SHA=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
"https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_ID" | jq -r '.head.sha')

echo "Commit SHA: $COMMIT_SHA"

response=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
"https://api.github.com/repos/$OWNER/$REPO/actions/runs?head_sha=$COMMIT_SHA&per_page=100")

echo "Response: $response"

run_ids=$(echo "$response" | jq -r '.workflow_runs[].id')

if [ -n "$run_ids" ]; then
echo "Found run_ids for commit $COMMIT_SHA: $run_ids"

for run_id in $run_ids; do
if [ "$JOB_NAME" = "all-failed" ]; then
echo "Rerunning all failed jobs for run_id: $run_id"

rerun_response=$(curl -X POST -s -w "%{http_code}" -o /dev/null \
-H "Accept: application/vnd.github.v3+json" \
-H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/rerun-failed-jobs")
if [ "$rerun_response" -eq 201 ]; then
echo "Successfully requested rerun for all blocked jobs in run_id: $run_id"
else
echo "Failed to request rerun for run_id: $run_id with status code $rerun_response"
fi

else
jobs_response=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
"https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/jobs")

echo "Jobs Response for run_id $run_id: $jobs_response"

# if [[ "$JOB_NAME" == *"bypass"* ]]; then
block_jobs=$(echo "$jobs_response" | jq -r --arg job_name "$JOB_NAME" \
'.jobs[] | select(.name == $job_name) | .id')
# else
# block_jobs=$(echo "$jobs_response" | jq -r --arg job_name "$JOB_NAME" \
# '.jobs[] | select(.name == $job_name and .conclusion != "success") | .id')
# fi

if [ -n "$block_jobs" ]; then
echo "Found block jobs for run_id $run_id: $block_jobs"

for job_id in $block_jobs; do
echo "Rerunning job_id: $job_id"
curl -X POST -H "Accept: application/vnd.github.v3+json" \
-H "Authorization: token $GITHUB_TOKEN" \
"https://api.github.com/repos/$OWNER/$REPO/actions/jobs/$job_id/rerun"
done
else
echo "No block jobs found for run_id $run_id with name $JOB_NAME."
fi
fi
done
else
echo "No matching workflow runs found for commit $COMMIT_SHA."
exit 1
fi
30 changes: 30 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<!-- TemplateReference: https://github.com/PaddlePaddle/FastDeploy/blob/develop/.github/pull_request_template.md -->

<!-- Thank you for your contribution! Please follow these guidelines to enhance your pull request. If anything is unclear, submit your PR and reach out to maintainers for assistance. -->

## Motivation

<!-- Describe the purpose and goals of this pull request. -->

## Modifications

<!-- Detail the changes made in this pull request. -->

## Usage or Command

<!-- You should provide the usage if this pr is about the new function. -->
<!-- You should provide the command to run if this pr is about the performance optimization or fixing bug. -->

## Accuracy Tests

<!-- If this pull request affects model outputs (e.g., changes to the kernel or model forward code), provide accuracy test results. -->

## Checklist

- [ ] Add at least a tag in the PR title.
- Tag list: [`[FDConfig]`,`[APIServer]`,`[Engine]`, `[Scheduler]`, `[PD Disaggregation]`, `[Executor]`, `[Graph Optimization]`, `[Speculative Decoding]`, `[RL]`, `[Models]`, `[Quantization]`, `[Loader]`, `[OP]`, `[KVCache]`, `[DataProcessor]`, `[BugFix]`, `[Docs]`, `[CI]`, `[Optimization]`, `[Feature]`, `[Benchmark]`, `[Others]`, `[XPU]`, `[HPU]`, `[GCU]`, `[DCU]`, `[Iluvatar]`, `[Metax]`]
- You can add new tags based on the PR content, but the semantics must be clear.
- [ ] Format your code, run `pre-commit` before commit.
- [ ] Add unit tests. Please write the reason in this PR if no unit tests.
- [ ] Provide accuracy results.
- [ ] If the current PR is submitting to the `release` branch, make sure the PR has been submitted to the `develop` branch, then cherry-pick it to the `release` branch with the `[Cherry-Pick]` PR tag.
13 changes: 9 additions & 4 deletions .github/workflows/_accuracy_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
FULL_REPO="${{ github.repository }}"
REPO_NAME="${FULL_REPO##*/}"
BASE_BRANCH="${{ github.base_ref }}"

docker pull ${docker_image}
# Clean the repository directory before starting
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
-e "REPO_NAME=${REPO_NAME}" \
Expand All @@ -55,7 +55,7 @@ jobs:
fi
'

wget -q ${fd_archive_url}
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
rm -rf FastDeploy.tar.gz
cd FastDeploy
Expand All @@ -80,12 +80,14 @@ jobs:
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
echo "Test ENV Parameter:"
echo "========================================================="
echo "FLASK_PORT=${FLASK_PORT}"
echo "FD_API_PORT=${FD_API_PORT}"
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
echo "DEVICES=${DEVICES}"
echo "========================================================="

Expand All @@ -99,7 +101,7 @@ jobs:
exit 1
fi

PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
echo "==== LOG_FILE is ${LOG_FILE} ===="

Expand Down Expand Up @@ -133,6 +135,7 @@ jobs:
-e "FD_API_PORT=${FD_API_PORT}" \
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
-e "FLASK_PORT=${FLASK_PORT}" \
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
Expand All @@ -152,11 +155,13 @@ jobs:
./llm-deploy-linux-amd64 -python python3.10 \
-model_name ERNIE-4.5-0.3B-Paddle \
-model_path /MODELDATA \
--skip install
--skip install,model

git config --global --add safe.directory /workspace/FastDeploy
cd FastDeploy
pushd tests/ce/deploy
ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
ps -ef | grep "${FD_ENGINE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
python3.10 deploy.py > dd.log 2>&1 &
sleep 3
curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
Expand Down
28 changes: 9 additions & 19 deletions .github/workflows/_base_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
FULL_REPO="${{ github.repository }}"
REPO_NAME="${FULL_REPO##*/}"
BASE_BRANCH="${{ github.base_ref }}"

docker pull ${docker_image}
# Clean the repository directory before starting
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
-e "REPO_NAME=${REPO_NAME}" \
Expand All @@ -55,7 +55,7 @@ jobs:
fi
'

wget -q ${fd_archive_url}
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
rm -rf FastDeploy.tar.gz
cd FastDeploy
Expand All @@ -80,12 +80,14 @@ jobs:
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
echo "Test ENV Parameter:"
echo "========================================================="
echo "FLASK_PORT=${FLASK_PORT}"
echo "FD_API_PORT=${FD_API_PORT}"
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
echo "DEVICES=${DEVICES}"
echo "========================================================="

Expand All @@ -99,7 +101,7 @@ jobs:
exit 1
fi

PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
echo "==== LOG_FILE is ${LOG_FILE} ===="

Expand Down Expand Up @@ -134,7 +136,7 @@ jobs:
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
-e "FLASK_PORT=${FLASK_PORT}" \
-e "FD_FORCE_CHUNKED_PREFILL=1" \
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache:/root/.cache" \
Expand All @@ -153,11 +155,13 @@ jobs:
./llm-deploy-linux-amd64 -python python3.10 \
-model_name ERNIE-4.5-0.3B-Paddle \
-model_path /MODELDATA \
--skip install
--skip install,model

git config --global --add safe.directory /workspace/FastDeploy
cd FastDeploy
pushd tests/ce/deploy
ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
ps -ef | grep "${FD_ENGINE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
python3.10 deploy.py > dd.log 2>&1 &
sleep 3
curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
Expand Down Expand Up @@ -202,20 +206,6 @@ jobs:
check_service 90
python -m pytest -sv test_max_waiting_time.py || TEST_EXIT_CODE=1

curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_mtp.yaml\", \"--enable-logprob\": \"False\"}"
check_service 180
export TEMPLATE=TOKEN_NORMAL
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1

curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_sot.yaml\", \"--enable-logprob\": \"False\"}"
check_service 360
export TEMPLATE=TOKEN_NORMAL
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1

popd
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
'
Expand Down
13 changes: 10 additions & 3 deletions .github/workflows/_build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ on:
jobs:
fd-build:
runs-on: [self-hosted, GPU-Build]
timeout-minutes: 240
timeout-minutes: 360
outputs:
wheel_path: ${{ steps.set_output.outputs.wheel_path }}
steps:
Expand All @@ -82,7 +82,7 @@ jobs:
fi
'

wget -q ${fd_archive_url}
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
rm -rf FastDeploy.tar.gz
cd FastDeploy
Expand All @@ -106,7 +106,12 @@ jobs:
CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}')
gpu_id=$(echo "$CARD_ID" | fold -w1 | paste -sd,)

CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
IFS='/' read -ra parts <<< "${GITHUB_WORKSPACE}"
len=${#parts[@]}
CCACHE_DEFAULT_DIR="/$(IFS=/; echo "${parts[*]:1:$((len-5))}")"
echo "$CCACHE_DEFAULT_DIR"

CACHE_DIR="${CACHE_DIR:-$CCACHE_DEFAULT_DIR}"
echo "CACHE_DIR is set to ${CACHE_DIR}"
if [ ! -f "${CACHE_DIR}/gitconfig" ]; then
touch "${CACHE_DIR}/gitconfig"
Expand All @@ -127,13 +132,15 @@ jobs:
-e "PADDLEVERSION=${PADDLEVERSION}" \
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
echo "Custom FastDeploy version: ${FASTDEPLOY_VERSION}"
fi

git config --global --add safe.directory /workspace/FastDeploy
chown -R $(whoami) /workspace/FastDeploy
cd FastDeploy
if [[ "${WITH_NIGHTLY_BUILD}" == "ON" ]];then
GIT_COMMIT_TIME=$(git --no-pager show -s --format=%ci HEAD)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: CI_GCU

on:
pull_request:
branches:
- develop
- 'release/*'
#pull_request:
#branches:
#- develop
#- 'release/*'
workflow_dispatch:

concurrency:
Expand Down
Loading