Skip to content

Commit 2ed7973

Browse files
committed
Enabled UT for test/distributed/tensor
Signed-off-by: Cheng, Penghui <[email protected]>
1 parent 60dbd6e commit 2ed7973

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed

test/xpu/skip_list_dist_local.py

+79
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,91 @@
136136
"../../../../test/distributed/pipelining/test_schedule.py": None,
137137
"../../../../test/distributed/pipelining/test_transformer.py": None,
138138
"../../../../test/distributed/pipelining/test_unflatten.py": None,
139+
"../../../../test/distributed/tensor/parallel/test_micro_pipeline_tp.py": (
140+
# NotImplementedError: The operator 'symm_mem::fused_matmul_reduce_scatter'
141+
# is not currently implemented for the XPU device
142+
# https://github.com/intel/torch-xpu-ops/issues/1547
143+
"test_dtensor_seq_par_shard_dim_0",
144+
"test_dtensor_seq_par_shard_dim_1",
145+
"test_fuse_matmul_reduce_scatter_A_dims_2_scatter_dim_0",
146+
"test_fuse_matmul_reduce_scatter_A_dims_2_scatter_dim_1",
147+
"test_fuse_matmul_reduce_scatter_A_dims_3_scatter_dim_0",
148+
"test_fuse_matmul_reduce_scatter_A_dims_3_scatter_dim_1",
149+
"test_fuse_matmul_reduce_scatter_A_dims_3_scatter_dim_2",
150+
# AssertionError: 'fused_all_gather_matmul' not found in '# AOT ID: ......'
151+
# https://github.com/intel/torch-xpu-ops/issues/1548
152+
"test_fuse_all_gather_matmul_A_dims_2_gather_dim_0_return_A_False",
153+
"test_fuse_all_gather_matmul_A_dims_2_gather_dim_0_return_A_True",
154+
"test_fuse_all_gather_matmul_A_dims_3_gather_dim_0_return_A_False",
155+
"test_fuse_all_gather_matmul_A_dims_3_gather_dim_0_return_A_True",
156+
"test_fuse_all_gather_matmul_A_dims_3_gather_dim_1_return_A_False",
157+
"test_fuse_all_gather_matmul_A_dims_3_gather_dim_1_return_A_True",
158+
# AssertionError: 'fused_all_gather_scaled_matmul' not found in 'graph():\n......'
159+
# https://github.com/intel/torch-xpu-ops/issues/1549
160+
"test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_0_return_A_False",
161+
"test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_0_return_A_True",
162+
"test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_0_return_A_False",
163+
"test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_0_return_A_True",
164+
"test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_1_return_A_False",
165+
"test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_1_return_A_True",
166+
# NotImplementedError: The operator 'aten::_scaled_mm.out' is not currently implemented for the XPU device.
167+
# https://github.com/intel/torch-xpu-ops/issues/1550
168+
"test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_1_return_A_False",
169+
"test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_1_return_A_True",
170+
"test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_2_return_A_False",
171+
"test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_2_return_A_True",
172+
# NotImplementedError: The operator 'symm_mem::fused_scaled_matmul_reduce_scatter'
173+
# is not currently implemented for the XPU device.
174+
# https://github.com/intel/torch-xpu-ops/issues/1551
175+
"test_fuse_scaled_matmul_reduce_scatter_A_dims_2_scatter_dim_0",
176+
"test_fuse_scaled_matmul_reduce_scatter_A_dims_2_scatter_dim_1",
177+
"test_fuse_scaled_matmul_reduce_scatter_A_dims_3_scatter_dim_0",
178+
"test_fuse_scaled_matmul_reduce_scatter_A_dims_3_scatter_dim_1",
179+
"test_fuse_scaled_matmul_reduce_scatter_A_dims_3_scatter_dim_2",
180+
"test_fuse_scaled_matmul_reduce_scatter_rowwise_scales_reshape_mm_reshape_scatter_dim_0",
181+
"test_fuse_scaled_matmul_reduce_scatter_rowwise_scales_reshape_mm_reshape_scatter_dim_1",
182+
"test_fuse_scaled_matmul_reduce_scatter_rowwise_scales_reshape_mm_reshape_scatter_dim_2",
183+
),
184+
"../../../../test/distributed/tensor/parallel/test_tp_examples.py": (
185+
# RuntimeError: aten.add.Tensor: got mixed torch.Tensor and DTensor, need to convert all torch.Tensor to DTensor before calling distributed operators!
186+
# https://github.com/intel/torch-xpu-ops/issues/1555
187+
"test/distributed/tensor/parallel/test_tp_examples.py::DistTensorParallelExampleTest::test_transformer_req_grad_seq_parallel_float32_thaw_all",
188+
"test_transformer_req_grad_seq_parallel_float32_thaw_layers_0_attention_wv__layers_0_feed_forward_w1__layers_1_feed_forward_w2__layers_1_ffn_norm__output__tok_embeddings",
189+
"test_transformer_req_grad_seq_parallel_float32_thaw_layers_1_ffn_norm__norm__output__tok_embeddings",
190+
"test_transformer_req_grad_seq_parallel_float32_thaw_norm__output__tok_embeddings",
191+
"test_transformer_req_grad_seq_parallel_float32_thaw_output__tok_embeddings",
192+
"test_transformer_training_is_seq_parallel_False_float32",
193+
"test_transformer_training_is_seq_parallel_True_float32",
194+
# NotImplementedError: Operator aten._scaled_dot_product_fused_attention_overrideable.default does not have a sharding strategy registered.
195+
# https://github.com/intel/torch-xpu-ops/issues/1556
196+
"test_transformer_req_grad_seq_parallel_float32_thaw_norm__output",
197+
),
139198
"../../../../test/distributed/tensor/parallel/test_tp_random_state.py": None,
199+
"../../../../test/distributed/tensor/parallel/test_parallelize_api.py": None,
200+
"../../../../test/distributed/tensor/parallel/test_tp_style.py": None,
201+
"../../../../test/distributed/tensor/test_api.py": None,
202+
"../../../../test/distributed/tensor/test_attention.py": None,
203+
"../../../../test/distributed/tensor/test_common_rules.py": None,
204+
"../../../../test/distributed/tensor/test_dtensor.py": None,
205+
"../../../../test/distributed/tensor/test_dtensor_compile.py": None,
206+
"../../../../test/distributed/tensor/test_experimental_ops.py": None,
207+
"../../../../test/distributed/tensor/test_init.py": None,
208+
"../../../../test/distributed/tensor/test_math_ops.py": (
209+
# RuntimeError: oneCCL: coll_param.cpp:455 validate: EXCEPTION: average operation is not supported for the scheduler path
210+
# https://github.com/intel/torch-xpu-ops/issues/1508
211+
"test_mean",
212+
"test_nll_loss_and_cross_entropy",
213+
),
214+
"../../../../test/distributed/tensor/test_random_ops.py": None,
215+
"../../../../test/distributed/tensor/test_redistribute.py": None,
216+
"../../../../test/distributed/tensor/test_tensor_ops.py": None,
217+
"../../../../test/distributed/tensor/experimental/test_register_sharding.py": None,
140218
}
141219

142220
skip_dict_python = {
143221
"distributed/test_c10d_ops_xccl.py": None,
144222
"distributed/test_c10d_xccl.py": None,
145223
"../../../../test/distributed/pipelining/test_schedule_multiproc.py": None, # Hang error.
146224
"../../../../test/distributed/pipelining/test_stage.py": None,
225+
"../../../../test/distributed/pipelining/test_transformer.py": None,
147226
}

0 commit comments

Comments
 (0)