Skip to content

Commit 3b58310

Browse files
authored
enhance set_stop_value_multi_ends and standardize the registration of some operators (#4525)
* fix custom_ops * paddleformers>=0.3.1
1 parent dc7faca commit 3b58310

18 files changed

+68
-17
lines changed

custom_ops/cpu_ops/stop_generation_multi_ends.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
#include <stdio.h>
1919
#include "paddle/extension.h"
2020

21+
#ifndef PD_BUILD_STATIC_OP
22+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
23+
#endif
24+
2125
bool is_in_end(const int64_t id, const int64_t *end_ids, int length) {
2226
bool flag = false;
2327
for (int i = 0; i < length; i++) {
@@ -49,6 +53,8 @@ void set_value_by_flags(bool *stop_flags,
4953
}
5054
if (!beam_search && is_in_end(topk_ids[bi], end_ids, end_length)) {
5155
stop_flags[bi] = true;
56+
topk_ids[bi] = end_ids[0];
57+
next_tokens[bi] = end_ids[0];
5258
}
5359
}
5460
}

custom_ops/gpu_ops/flash_mask_attn/flash_mask_attn.cu

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
#include "kernel_traits.h"
2121
#include "flash_mask_attn_kernel.hpp"
2222

23+
#ifndef PD_BUILD_STATIC_OP
24+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
25+
#endif
26+
2327
template <typename paddle_type>
2428
struct cuteType;
2529

@@ -142,7 +146,7 @@ std::vector<paddle::Tensor> FlashAttentionMask(
142146
}
143147

144148

145-
PD_BUILD_OP(flash_attention_mask)
149+
PD_BUILD_STATIC_OP(flash_attention_mask)
146150
.Inputs({
147151
"q_input",
148152
"k_input",

custom_ops/gpu_ops/get_img_boundaries.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414

1515
#include "paddle/extension.h"
1616

17+
#ifndef PD_BUILD_STATIC_OP
18+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
19+
#endif
20+
1721
std::vector<paddle::Tensor> GetImgBoundaries(const paddle::Tensor& task_input_ids,
1822
const paddle::Tensor& grid_thw,
1923
const int64_t image_patch_id) {
@@ -53,7 +57,7 @@ std::vector<paddle::Tensor> GetImgBoundaries(const paddle::Tensor& task_input_id
5357
return {out};
5458
}
5559

56-
PD_BUILD_OP(get_img_boundaries)
60+
PD_BUILD_STATIC_OP(get_img_boundaries)
5761
.Inputs({"task_input_ids", "grid_thw"})
5862
.Attrs({"image_patch_id: int64_t"})
5963
.Outputs({"img_boundaries"})

custom_ops/gpu_ops/get_mm_split_fuse.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
#include "paddle/extension.h"
1616
#include <map>
1717

18+
#ifndef PD_BUILD_STATIC_OP
19+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
20+
#endif
21+
1822
std::vector<paddle::Tensor> GetMmSplitFuse(const paddle::Tensor& task_input_ids,
1923
const paddle::Tensor& task_image_type_ids,
2024
const paddle::Tensor& task_input_ids_image_token_count,
@@ -133,7 +137,7 @@ std::vector<paddle::Tensor> GetMmSplitFuse(const paddle::Tensor& task_input_ids,
133137
return {image_chunk_selections_out, split_fuse_cur_seq_lens_out};
134138
}
135139

136-
PD_BUILD_OP(get_mm_split_fuse)
140+
PD_BUILD_STATIC_OP(get_mm_split_fuse)
137141
.Inputs({"task_input_ids", "task_image_type_ids", "task_input_ids_image_token_count", "grid_thw"})
138142
.Attrs({"image_token_id: int64_t", "img_total: int64_t", "batch_idx: int", "seq_lens_origin: int", "split_fuse_img_size: int", "split_fuse_text_size: int", "max_chunk_token_size: int"})
139143
.Outputs({"image_chunk_selections", "split_fuse_cur_seq_lens"})

custom_ops/gpu_ops/limit_thinking_content_length_v1.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ void LimitThinkingContentLengthV1(const paddle::Tensor &next_tokens,
8080
batch_size);
8181
}
8282

83-
PD_BUILD_OP(limit_thinking_content_length_v1)
83+
PD_BUILD_STATIC_OP(limit_thinking_content_length_v1)
8484
.Inputs({"next_tokens", "max_think_lens", "step_idx", "limit_think_status"})
8585
.Attrs({"think_end_id: int64_t"})
8686
.Outputs({"next_tokens_out"})

custom_ops/gpu_ops/limit_thinking_content_length_v2.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ void LimitThinkingContentLengthV2(const paddle::Tensor &next_tokens,
103103
batch_size);
104104
}
105105

106-
PD_BUILD_OP(limit_thinking_content_length_v2)
106+
PD_BUILD_STATIC_OP(limit_thinking_content_length_v2)
107107
.Inputs({"next_tokens", "max_think_lens", "step_idx", "limit_think_status"})
108108
.Attrs({"think_end_id: int64_t", "line_break_id: int64_t"})
109109
.Outputs({"next_tokens_out"})

custom_ops/gpu_ops/moba_attn/moba_attn.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#include "paddle/extension.h"
1616
#include "moba_attn.h"
1717

18+
#ifndef PD_BUILD_STATIC_OP
19+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
20+
#endif
1821

1922
std::vector<paddle::Tensor> MobaAttention(
2023
const paddle::Tensor& qkv,
@@ -272,7 +275,7 @@ std::vector<paddle::Tensor> MobaAttention(
272275
}
273276

274277

275-
PD_BUILD_OP(moba_attention)
278+
PD_BUILD_STATIC_OP(moba_attention)
276279
.Inputs({
277280
"qkv",
278281
"q_input",

custom_ops/gpu_ops/moba_attn/moba_decoder_attn/moba_qk_sort_decoder.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include "moba_attn/moba_attn_utils.hpp"
1717
#include "moba_attn/moba_attn.h"
1818

19+
#ifndef PD_BUILD_STATIC_OP
20+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
21+
#endif
1922

2023
template <typename T, int knthreads, int moba_block_size, int kBlockMaxN, int searchtimes>
2124
__global__ void qk_gate_sort_decoder_kernel(
@@ -221,7 +224,7 @@ std::vector<paddle::Tensor> QkSortDecoder(
221224
}
222225
}
223226

224-
PD_BUILD_OP(moba_qk_sort_decoder)
227+
PD_BUILD_STATIC_OP(moba_qk_sort_decoder)
225228
.Inputs({
226229
"qk_gate_weight",
227230
"seq_len_encoder",

custom_ops/gpu_ops/moba_attn/moba_encoder_attn/moba_encoder_attn.cu

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
#include "softmax.hpp"
2828
#include "cutlass/arch/reg_reconfig.h"
2929

30+
#ifndef PD_BUILD_STATIC_OP
31+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
32+
#endif
33+
3034
template <int kHeadDim>
3135
auto get_gmem_layout(int token_num, int head_num) {
3236
return make_layout(
@@ -360,7 +364,7 @@ void MobaEncoderAttn(
360364
}
361365

362366

363-
PD_BUILD_OP(moba_encoder_attn)
367+
PD_BUILD_STATIC_OP(moba_encoder_attn)
364368
.Inputs({
365369
"q_input",
366370
"k_input",

custom_ops/gpu_ops/moba_attn/moba_encoder_attn/moba_encoder_write_cache.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#include "paddle/extension.h"
1616
#include "moba_attn/moba_attn.h"
1717

18+
#ifndef PD_BUILD_STATIC_OP
19+
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
20+
#endif
1821

1922
template <typename T, int kBlockSize, int kHeadDim>
2023
__global__ void write_encoder_cachekv_c16(
@@ -135,7 +138,7 @@ void MobaEncoderAttnWriteCacheKv(
135138
}
136139
}
137140

138-
PD_BUILD_OP(moba_encoder_attn_write_cache_kv)
141+
PD_BUILD_STATIC_OP(moba_encoder_attn_write_cache_kv)
139142
.Inputs({
140143
"k_input",
141144
"v_input",

0 commit comments

Comments
 (0)