Skip to content

Commit 48db2f2

Browse files
committed
change name
1 parent bb712ab commit 48db2f2

20 files changed

+94
-116
lines changed

csrc/build_aclnn.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ cd custom_ops/
55
bash build.sh custom_ops -cascend910_93
66

77
# install custom ops
8-
# ./output/CANN-custom_ops--linux.x86_64.run
9-
# export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH}
8+
./build_out/custom_ops/run/CANN_ascend910_93_ubuntu_aarch64.run --install-path=/usr/local/Ascend/ascend-toolkit/latest/opp/
9+
source /usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/bin/set_env.bash

csrc/custom_ops/kernels/fused_deep_moe/op_host/fused_deep_moe.cpp renamed to csrc/custom_ops/kernels/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
/*
22
* Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
3-
* Description: FusedDeepMoe operator definition file
3+
* Description: DispatchGmmCombineDecode operator definition file
44
* Author: WANG Qiankun
55
* Create: 2025-07-19
66
* Note:
7-
* History: 2025-07-19 create FusedDeepMoe operator definition file
7+
* History: 2025-07-19 create DispatchGmmCombineDecode operator definition file
88
*/
99
#include "register/op_def_registry.h"
1010

1111
namespace ops {
12-
class FusedDeepMoe : public OpDef
12+
class DispatchGmmCombineDecode : public OpDef
1313
{
1414
public:
15-
explicit FusedDeepMoe(const char *name) : OpDef(name)
15+
explicit DispatchGmmCombineDecode(const char *name) : OpDef(name)
1616
{
1717
this->Input("x")
1818
.ParamType(REQUIRED)
@@ -78,5 +78,5 @@ class FusedDeepMoe : public OpDef
7878
}
7979
};
8080

81-
OP_ADD(FusedDeepMoe);
81+
OP_ADD(DispatchGmmCombineDecode);
8282
} // namespace ops

csrc/custom_ops/kernels/fused_deep_moe/op_host/fused_deep_moe_infer.cpp renamed to csrc/custom_ops/kernels/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_infer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
/*
22
* Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
3-
* Description: FusedDeepMoe tiling function implementation file
3+
* Description: DispatchGmmCombineDecode tiling function implementation file
44
* Author: Guo Ren
55
* Create: 2025-07-22
66
* Note:
7-
* History: 2025-07-13 create FusedDeepMoe infer function file
7+
* History: 2025-07-13 create DispatchGmmCombineDecode infer function file
88
*/
99

1010
#include <cstdint>
@@ -89,5 +89,5 @@ static ge::graphStatus InferDataType(gert::InferDataTypeContext *context)
8989
return ge::GRAPH_SUCCESS;
9090
}
9191

92-
IMPL_OP(FusedDeepMoe).InferShape(InferShape).InferDataType(InferDataType);
92+
IMPL_OP(DispatchGmmCombineDecode).InferShape(InferShape).InferDataType(InferDataType);
9393
} // namespace ge

csrc/custom_ops/kernels/fused_deep_moe/op_host/fused_deep_moe_tiling.cpp renamed to csrc/custom_ops/kernels/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
/*
22
* Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
3-
* Description: FusedDeepMoe tiling function implementation file
3+
* Description: DispatchGmmCombineDecode tiling function implementation file
44
* Author: WANG Qiankun
55
* Create: 2025-07-19
66
* Note:
7-
* History: 2025-07-19 create FusedDeepMoe tiling function implementation file
7+
* History: 2025-07-19 create DispatchGmmCombineDecode tiling function implementation file
88
*/
99
#include <cstdio>
1010
#include <cstdint>
@@ -13,7 +13,7 @@
1313
#include "error_log.h"
1414
#include "graph/utils/type_utils.h"
1515
#include "register/op_def_registry.h"
16-
#include "../op_kernel/fused_deep_moe_tiling.h"
16+
#include "../op_kernel/dispatch_gmm_combine_decode_tiling.h"
1717
#include "tiling/platform/platform_ascendc.h"
1818
#include "tiling/hccl/hccl_tiling.h"
1919

@@ -67,7 +67,7 @@ static size_t CeilUp(size_t x, size_t y)
6767
}
6868

6969
static ge::graphStatus CheckTensorShape(gert::TilingContext *context, const char *nodeName,
70-
FusedDeepMoeTilingData &tilingData)
70+
DispatchGmmCombineDecodeTilingData &tilingData)
7171
{
7272
uint32_t epRankId = tilingData.disGmmDeqSwigluQuantGmmDeqComInfo.epRankId;
7373
uint32_t moeExpertNum = tilingData.disGmmDeqSwigluQuantGmmDeqComInfo.moeExpertNum;
@@ -127,7 +127,7 @@ static ge::graphStatus CheckTensorShape(gert::TilingContext *context, const char
127127
return ge::GRAPH_SUCCESS;
128128
}
129129

130-
static ge::graphStatus CheckData(const char *nodeName, FusedDeepMoeTilingData &tilingData)
130+
static ge::graphStatus CheckData(const char *nodeName, DispatchGmmCombineDecodeTilingData &tilingData)
131131
{
132132
uint32_t batchSize = tilingData.disGmmDeqSwigluQuantGmmDeqComInfo.bs;
133133
OP_TILING_CHECK(batchSize < MIN_BATCH_SIZE, OP_LOGE(nodeName, "batchSize(bs) must >= %d.", MIN_BATCH_SIZE),
@@ -162,7 +162,7 @@ static ge::graphStatus CheckData(const char *nodeName, FusedDeepMoeTilingData &t
162162
}
163163

164164
static ge::graphStatus GetAttrAndSetTilingData(gert::TilingContext *context, const char *nodeName,
165-
FusedDeepMoeTilingData &tilingData, std::string &groupEp)
165+
DispatchGmmCombineDecodeTilingData &tilingData, std::string &groupEp)
166166
{
167167
auto attrs = context->GetAttrs();
168168
OP_TILING_CHECK(attrs == nullptr, OP_LOGE(nodeName, "attrs is nullptr."), return ge::GRAPH_FAILED);
@@ -209,10 +209,10 @@ static ge::graphStatus GetAttrAndSetTilingData(gert::TilingContext *context, con
209209
return ge::GRAPH_SUCCESS;
210210
}
211211

212-
static void SetHcommCfg(const gert::TilingContext *context, FusedDeepMoeTilingData *tiling, const std::string groupEp)
212+
static void SetHcommCfg(const gert::TilingContext *context, DispatchGmmCombineDecodeTilingData *tiling, const std::string groupEp)
213213
{
214214
const char *nodeName = context->GetNodeName();
215-
OP_LOGD(nodeName, "FusedDeepMoe groupEp = %s", groupEp.c_str());
215+
OP_LOGD(nodeName, "DispatchGmmCombineDecode groupEp = %s", groupEp.c_str());
216216
uint32_t opType = OP_TYPE_ALL_TO_ALL;
217217
std::string algConfigAllToAllStr = "AlltoAll=level0:fullmesh;level1:pairwise";
218218
std::string algConfigAllGatherStr = "AllGather=level0:ring";
@@ -223,7 +223,7 @@ static void SetHcommCfg(const gert::TilingContext *context, FusedDeepMoeTilingDa
223223
}
224224

225225
static ge::graphStatus SetWorkSpace(gert::TilingContext *context, const char *nodeName,
226-
FusedDeepMoeTilingData &tilingData)
226+
DispatchGmmCombineDecodeTilingData &tilingData)
227227
{
228228
size_t *workSpaces = context->GetWorkspaceSizes(1);
229229
OP_TILING_CHECK(workSpaces == nullptr, OP_LOGE(nodeName, "workSpaces is nullptr."), return ge::GRAPH_FAILED);
@@ -263,10 +263,10 @@ static ge::graphStatus SetWorkSpace(gert::TilingContext *context, const char *no
263263
return ge::GRAPH_SUCCESS;
264264
}
265265

266-
static ge::graphStatus FusedDeepMoeTilingFuncImpl(gert::TilingContext *context)
266+
static ge::graphStatus DispatchGmmCombineDecodeTilingFuncImpl(gert::TilingContext *context)
267267
{
268268
const char *nodeName = context->GetNodeName();
269-
FusedDeepMoeTilingData *tilingData = context->GetTilingData<FusedDeepMoeTilingData>();
269+
DispatchGmmCombineDecodeTilingData *tilingData = context->GetTilingData<DispatchGmmCombineDecodeTilingData>();
270270
OP_TILING_CHECK(tilingData == nullptr, OP_LOGE(nodeName, "tilingData is nullptr."), return ge::GRAPH_FAILED);
271271
std::string groupEp = "";
272272

@@ -312,20 +312,20 @@ static ge::graphStatus FusedDeepMoeTilingFuncImpl(gert::TilingContext *context)
312312
return ge::GRAPH_SUCCESS;
313313
}
314314

315-
static ge::graphStatus FusedDeepMoeTilingFunc(gert::TilingContext *context)
315+
static ge::graphStatus DispatchGmmCombineDecodeTilingFunc(gert::TilingContext *context)
316316
{
317-
ge::graphStatus ret = FusedDeepMoeTilingFuncImpl(context);
317+
ge::graphStatus ret = DispatchGmmCombineDecodeTilingFuncImpl(context);
318318
return ret;
319319
}
320320

321-
struct FusedDeepMoeCompileInfo {};
322-
ge::graphStatus TilingParseForFusedDeepMoe(gert::TilingParseContext *context)
321+
struct DispatchGmmCombineDecodeCompileInfo {};
322+
ge::graphStatus TilingParseForDispatchGmmCombineDecode(gert::TilingParseContext *context)
323323
{
324324
(void)context;
325325
return ge::GRAPH_SUCCESS;
326326
}
327327

328-
IMPL_OP_OPTILING(FusedDeepMoe)
329-
.Tiling(FusedDeepMoeTilingFunc)
330-
.TilingParse<FusedDeepMoeCompileInfo>(TilingParseForFusedDeepMoe);
328+
IMPL_OP_OPTILING(DispatchGmmCombineDecode)
329+
.Tiling(DispatchGmmCombineDecodeTilingFunc)
330+
.TilingParse<DispatchGmmCombineDecodeCompileInfo>(TilingParseForDispatchGmmCombineDecode);
331331
} // namespace optiling

csrc/custom_ops/kernels/fused_deep_moe/op_kernel/fused_deep_moe.cpp renamed to csrc/custom_ops/kernels/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
/*
22
* Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
3-
* Description: FusedDeepMoe operator kernel function implementation file
3+
* Description: DispatchGmmCombineDecode operator kernel function implementation file
44
* Author: WANG Qiankun
55
* Create: 2025-07-19
66
* Note:
7-
* History: 2025-07-19 create FusedDeepMoe operator kernel function implementation file
7+
* History: 2025-07-19 create DispatchGmmCombineDecode operator kernel function implementation file
88
*/
9-
#include "fused_deep_moe.h"
9+
#include "dispatch_gmm_combine_decode.h"
1010
#include <kernel_operator.h>
1111
#include "lib/matmul_intf.h"
1212

13-
extern "C" __global__ __aicore__ void fused_deep_moe(
13+
extern "C" __global__ __aicore__ void dispatch_gmm_combine_decode(
1414
// input
1515
GM_ADDR x, GM_ADDR expert_ids, GM_ADDR gmm1_permuted_weight, GM_ADDR gmm1_permuted_weight_scale,
1616
GM_ADDR gmm2_weight, GM_ADDR gmm2_weight_scale, GM_ADDR expert_smooth_scales, GM_ADDR expert_scales,
@@ -21,11 +21,11 @@ extern "C" __global__ __aicore__ void fused_deep_moe(
2121
{
2222
icache_preload(8);
2323
// New output recvCount
24-
REGISTER_TILING_DEFAULT(FusedDeepMoeTilingData);
24+
REGISTER_TILING_DEFAULT(DispatchGmmCombineDecodeTilingData);
2525
KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2); // 1C2V
2626
GET_TILING_DATA(tiling_data, tiling);
2727
if constexpr (TILING_KEY_IS(0) || TILING_KEY_IS(1)) {
28-
FusedDeepMoe<DTYPE_X, int32_t, false, TILING_KEY_VAR> op;
28+
DispatchGmmCombineDecode<DTYPE_X, int32_t, false, TILING_KEY_VAR> op;
2929
op.Init(x, expert_ids, gmm1_permuted_weight, gmm1_permuted_weight_scale, gmm2_weight, gmm2_weight_scale,
3030
expert_smooth_scales, expert_scales, output, outputRecvCount, workspace, nullptr, &tiling_data);
3131
op.Process();

csrc/custom_ops/kernels/fused_deep_moe/op_kernel/fused_deep_moe.h renamed to csrc/custom_ops/kernels/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
/*
22
* Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
3-
* Description: FusedDeepMoe operator kernel function header file, for a3
3+
* Description: DispatchGmmCombineDecode operator kernel function header file, for a3
44
* Author: WANG Qiankun
55
* Create: 2025-07-19
66
* Note:
7-
* History: 2025-07-19 create FusedDeepMoe operator kernel function header file, for a3
7+
* History: 2025-07-19 create DispatchGmmCombineDecode operator kernel function header file, for a3
88
*/
9-
#ifndef FUSED_DEEP_MOE_H
10-
#define FUSED_DEEP_MOE_H
9+
#ifndef DISPATCH_GMM_COMBINE_DECODE_H
10+
#define DISPATCH_GMM_COMBINE_DECODE_H
1111

1212
#include "lib/matmul_intf.h"
1313
#include <kernel_operator.h>
@@ -29,8 +29,8 @@
2929

3030
#include "operator/cam_moe_distribute_combine/op_kernel/a3/cam_moe_distribute_dispatch.h"
3131

32-
#include "fused_deep_moe_tiling.h"
33-
#include "fused_deep_moe_base.h"
32+
#include "dispatch_gmm_combine_decode_tiling.h"
33+
#include "dispatch_gmm_combine_decode_base.h"
3434

3535
#define ENABLE_GMM2_COMBINE
3636

@@ -235,18 +235,18 @@ ACT_DEVICE void GmmDeq(GemmCoord problemShape, uint32_t groupCount, GM_ADDR gmGr
235235
}
236236

237237
template <TemplateMC2TypeClass>
238-
class FusedDeepMoe
238+
class DispatchGmmCombineDecode
239239
{
240240
public:
241-
__aicore__ inline FusedDeepMoe(){};
241+
__aicore__ inline DispatchGmmCombineDecode(){};
242242
__aicore__ inline void Init(
243243
// input
244244
GM_ADDR x, GM_ADDR expert_ids, GM_ADDR gmm1_permuted_weight, GM_ADDR gmm1_permuted_weight_scale,
245245
GM_ADDR gmm2_weight, GM_ADDR gmm2_weight_scale, GM_ADDR expert_smooth_scales, GM_ADDR expert_scales,
246246
// output
247247
GM_ADDR output, GM_ADDR outputRecvCount,
248248
// system
249-
GM_ADDR workspaceGM, AscendC::TPipe *pipe, const FusedDeepMoeTilingData *tilingData);
249+
GM_ADDR workspaceGM, AscendC::TPipe *pipe, const DispatchGmmCombineDecodeTilingData *tilingData);
250250
__aicore__ inline void Process();
251251

252252
private:
@@ -285,18 +285,18 @@ class FusedDeepMoe
285285

286286
AscendC::TPipe *tpipe_{nullptr};
287287
__gm__ HcclOpResParam *winContext_{nullptr};
288-
const FusedDeepMoeTilingData *tilingData_;
288+
const DispatchGmmCombineDecodeTilingData *tilingData_;
289289
};
290290

291291
template <TemplateMC2TypeClass>
292-
__aicore__ inline void FusedDeepMoe<TemplateMC2TypeFunc>::Init(
292+
__aicore__ inline void DispatchGmmCombineDecode<TemplateMC2TypeFunc>::Init(
293293
// input
294294
GM_ADDR x, GM_ADDR expert_ids, GM_ADDR gmm1_permuted_weight, GM_ADDR gmm1_permuted_weight_scale,
295295
GM_ADDR gmm2_weight, GM_ADDR gmm2_weight_scale, GM_ADDR expert_smooth_scales, GM_ADDR expert_scales,
296296
// output
297297
GM_ADDR output, GM_ADDR outputRecvCount,
298298
// system
299-
GM_ADDR workspaceGM, AscendC::TPipe *pipe, const FusedDeepMoeTilingData *tilingData)
299+
GM_ADDR workspaceGM, AscendC::TPipe *pipe, const DispatchGmmCombineDecodeTilingData *tilingData)
300300
{
301301
tpipe_ = pipe;
302302
blockDim_ = AscendC::GetBlockNum();
@@ -341,15 +341,15 @@ __aicore__ inline void FusedDeepMoe<TemplateMC2TypeFunc>::Init(
341341
}
342342

343343
template <TemplateMC2TypeClass>
344-
__aicore__ inline void FusedDeepMoe<TemplateMC2TypeFunc>::Process()
344+
__aicore__ inline void DispatchGmmCombineDecode<TemplateMC2TypeFunc>::Process()
345345
{
346346
#ifdef ENABLE_GMM2_COMBINE
347347
if (g_coreType == AscendC::AIV) {
348-
((FusedDeepMoeTilingData *)tilingData_)->disGmmDeqSwigluQuantGmmDeqComInfo.aicNum = get_block_num();
348+
((DispatchGmmCombineDecodeTilingData *)tilingData_)->disGmmDeqSwigluQuantGmmDeqComInfo.aicNum = get_block_num();
349349
if constexpr (EXEC_FLAG & EXEC_FLAG_DEEP_FUSE) {
350-
((FusedDeepMoeTilingData *)tilingData_)->disGmmDeqSwigluQuantGmmDeqComInfo.aivNum = get_block_num();
350+
((DispatchGmmCombineDecodeTilingData *)tilingData_)->disGmmDeqSwigluQuantGmmDeqComInfo.aivNum = get_block_num();
351351
} else {
352-
((FusedDeepMoeTilingData *)tilingData_)->disGmmDeqSwigluQuantGmmDeqComInfo.aivNum =
352+
((DispatchGmmCombineDecodeTilingData *)tilingData_)->disGmmDeqSwigluQuantGmmDeqComInfo.aivNum =
353353
get_block_num() * get_subblockdim();
354354
}
355355
}
@@ -444,4 +444,4 @@ __aicore__ inline void FusedDeepMoe<TemplateMC2TypeFunc>::Process()
444444
layoutOutput, gmWorkspace, &combiner);
445445
#endif
446446
}
447-
#endif // FUSED_DEEP_MOE_H
447+
#endif // DISPATCH_GMM_COMBINE_DECODE_H

csrc/custom_ops/kernels/fused_deep_moe/op_kernel/fused_deep_moe_base.h renamed to csrc/custom_ops/kernels/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
* Note:
77
* History: 2025-07-19 Create a definition file for a distribution group related structure
88
*/
9-
#ifndef FUSED_DEEP_MOE_BASE_H
10-
#define FUSED_DEEP_MOE_BASE_H
9+
#ifndef DISPATCH_GMM_COMBINE_DECODE_BASE_H
10+
#define DISPATCH_GMM_COMBINE_DECODE_BASE_H
1111

1212
#include "moe_distribute_base.h"
1313

1414
#define TemplateMC2TypeClass typename ExpandXType, typename ExpandIdxType, bool IsNeedReduceScatter, uint32_t EXEC_FLAG
1515
#define TemplateMC2TypeFunc ExpandXType, ExpandIdxType, IsNeedReduceScatter, EXEC_FLAG
1616

17-
#endif // FUSED_DEEP_MOE_BASE_H
17+
#endif // DISPATCH_GMM_COMBINE_DECODE_BASE_H

csrc/custom_ops/kernels/fused_deep_moe/op_kernel/fused_deep_moe_tiling.h renamed to csrc/custom_ops/kernels/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
/*
22
* Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
3-
* Description: FusedDeepMoe tilingData definition file
3+
* Description: DispatchGmmCombineDecode tilingData definition file
44
* Author: WANG Qiankun
55
* Create: 2025-07-19
66
* Note:
7-
* History: 2025-07-19 create FusedDeepMoe tilingData definition file
7+
* History: 2025-07-19 create DispatchGmmCombineDecode tilingData definition file
88
*/
99

10-
#ifndef FUSED_DEEP_MOE_TILING_H
11-
#define FUSED_DEEP_MOE_TILING_H
10+
#ifndef DISPATCH_GMM_COMBINE_DECODE_TILING_H
11+
#define DISPATCH_GMM_COMBINE_DECODE_TILING_H
1212

1313
#include "kernel_tiling/kernel_tiling.h"
1414

15-
struct FusedDeepMoeInfo {
15+
struct DispatchGmmCombineDecodeInfo {
1616
uint32_t epRankSize; // epRankSize
1717
uint32_t epRankId; // epRankId
1818
uint32_t moeExpertNum; // moe expert number
@@ -31,10 +31,10 @@ struct FusedDeepMoeInfo {
3131
uint64_t gmm1HLen;
3232
};
3333

34-
struct FusedDeepMoeTilingData {
34+
struct DispatchGmmCombineDecodeTilingData {
3535
Mc2InitTiling mc2InitTiling;
3636
Mc2CcTiling mc2CcTiling;
37-
FusedDeepMoeInfo disGmmDeqSwigluQuantGmmDeqComInfo;
37+
DispatchGmmCombineDecodeInfo disGmmDeqSwigluQuantGmmDeqComInfo;
3838
};
3939

4040
constexpr uint32_t GM_ALIGN_BYTE = 512;
@@ -70,4 +70,4 @@ constexpr uint32_t WORKSPACE_STAGES = 4;
7070

7171
constexpr uint32_t EXEC_FLAG_DEEP_FUSE = (1U << 0);
7272

73-
#endif // FUSED_DEEP_MOE_TILING_H
73+
#endif // DISPATCH_GMM_COMBINE_DECODE_TILING_H

0 commit comments

Comments
 (0)