|
136 | 136 | "../../../../test/distributed/pipelining/test_schedule.py": None,
|
137 | 137 | "../../../../test/distributed/pipelining/test_transformer.py": None,
|
138 | 138 | "../../../../test/distributed/pipelining/test_unflatten.py": None,
|
| 139 | + "../../../../test/distributed/tensor/parallel/test_micro_pipeline_tp.py": ( |
| 140 | + # NotImplementedError: The operator 'symm_mem::fused_matmul_reduce_scatter' |
| 141 | + # is not currently implemented for the XPU device |
| 142 | + # https://github.com/intel/torch-xpu-ops/issues/1547 |
| 143 | + "test_dtensor_seq_par_shard_dim_0", |
| 144 | + "test_dtensor_seq_par_shard_dim_1", |
| 145 | + "test_fuse_matmul_reduce_scatter_A_dims_2_scatter_dim_0", |
| 146 | + "test_fuse_matmul_reduce_scatter_A_dims_2_scatter_dim_1", |
| 147 | + "test_fuse_matmul_reduce_scatter_A_dims_3_scatter_dim_0", |
| 148 | + "test_fuse_matmul_reduce_scatter_A_dims_3_scatter_dim_1", |
| 149 | + "test_fuse_matmul_reduce_scatter_A_dims_3_scatter_dim_2", |
| 150 | + # AssertionError: 'fused_all_gather_matmul' not found in '# AOT ID: ......' |
| 151 | + # https://github.com/intel/torch-xpu-ops/issues/1548 |
| 152 | + "test_fuse_all_gather_matmul_A_dims_2_gather_dim_0_return_A_False", |
| 153 | + "test_fuse_all_gather_matmul_A_dims_2_gather_dim_0_return_A_True", |
| 154 | + "test_fuse_all_gather_matmul_A_dims_3_gather_dim_0_return_A_False", |
| 155 | + "test_fuse_all_gather_matmul_A_dims_3_gather_dim_0_return_A_True", |
| 156 | + "test_fuse_all_gather_matmul_A_dims_3_gather_dim_1_return_A_False", |
| 157 | + "test_fuse_all_gather_matmul_A_dims_3_gather_dim_1_return_A_True", |
| 158 | + # AssertionError: 'fused_all_gather_scaled_matmul' not found in 'graph():\n......' |
| 159 | + # https://github.com/intel/torch-xpu-ops/issues/1549 |
| 160 | + "test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_0_return_A_False", |
| 161 | + "test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_0_return_A_True", |
| 162 | + "test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_0_return_A_False", |
| 163 | + "test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_0_return_A_True", |
| 164 | + "test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_1_return_A_False", |
| 165 | + "test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_1_return_A_True", |
| 166 | + # NotImplementedError: The operator 'aten::_scaled_mm.out' is not currently implemented for the XPU device. |
| 167 | + # https://github.com/intel/torch-xpu-ops/issues/1550 |
| 168 | + "test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_1_return_A_False", |
| 169 | + "test_fuse_all_gather_scaled_matmul_A_dims_2_gather_dim_1_return_A_True", |
| 170 | + "test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_2_return_A_False", |
| 171 | + "test_fuse_all_gather_scaled_matmul_A_dims_3_gather_dim_2_return_A_True", |
| 172 | + # NotImplementedError: The operator 'symm_mem::fused_scaled_matmul_reduce_scatter' |
| 173 | + # is not currently implemented for the XPU device. |
| 174 | + # https://github.com/intel/torch-xpu-ops/issues/1551 |
| 175 | + "test_fuse_scaled_matmul_reduce_scatter_A_dims_2_scatter_dim_0", |
| 176 | + "test_fuse_scaled_matmul_reduce_scatter_A_dims_2_scatter_dim_1", |
| 177 | + "test_fuse_scaled_matmul_reduce_scatter_A_dims_3_scatter_dim_0", |
| 178 | + "test_fuse_scaled_matmul_reduce_scatter_A_dims_3_scatter_dim_1", |
| 179 | + "test_fuse_scaled_matmul_reduce_scatter_A_dims_3_scatter_dim_2", |
| 180 | + "test_fuse_scaled_matmul_reduce_scatter_rowwise_scales_reshape_mm_reshape_scatter_dim_0", |
| 181 | + "test_fuse_scaled_matmul_reduce_scatter_rowwise_scales_reshape_mm_reshape_scatter_dim_1", |
| 182 | + "test_fuse_scaled_matmul_reduce_scatter_rowwise_scales_reshape_mm_reshape_scatter_dim_2", |
| 183 | + ), |
| 184 | + "../../../../test/distributed/tensor/parallel/test_tp_examples.py": ( |
| 185 | + # RuntimeError: aten.add.Tensor: got mixed torch.Tensor and DTensor, need to convert all torch.Tensor to DTensor before calling distributed operators! |
| 186 | + # https://github.com/intel/torch-xpu-ops/issues/1555 |
| 187 | + "test/distributed/tensor/parallel/test_tp_examples.py::DistTensorParallelExampleTest::test_transformer_req_grad_seq_parallel_float32_thaw_all", |
| 188 | + "test_transformer_req_grad_seq_parallel_float32_thaw_layers_0_attention_wv__layers_0_feed_forward_w1__layers_1_feed_forward_w2__layers_1_ffn_norm__output__tok_embeddings", |
| 189 | + "test_transformer_req_grad_seq_parallel_float32_thaw_layers_1_ffn_norm__norm__output__tok_embeddings", |
| 190 | + "test_transformer_req_grad_seq_parallel_float32_thaw_norm__output__tok_embeddings", |
| 191 | + "test_transformer_req_grad_seq_parallel_float32_thaw_output__tok_embeddings", |
| 192 | + "test_transformer_training_is_seq_parallel_False_float32", |
| 193 | + "test_transformer_training_is_seq_parallel_True_float32", |
| 194 | + # NotImplementedError: Operator aten._scaled_dot_product_fused_attention_overrideable.default does not have a sharding strategy registered. |
| 195 | + # https://github.com/intel/torch-xpu-ops/issues/1556 |
| 196 | + "test_transformer_req_grad_seq_parallel_float32_thaw_norm__output", |
| 197 | + ), |
139 | 198 | "../../../../test/distributed/tensor/parallel/test_tp_random_state.py": None,
|
| 199 | + "../../../../test/distributed/tensor/parallel/test_parallelize_api.py": None, |
| 200 | + "../../../../test/distributed/tensor/parallel/test_tp_style.py": None, |
| 201 | + "../../../../test/distributed/tensor/test_api.py": None, |
| 202 | + "../../../../test/distributed/tensor/test_attention.py": None, |
| 203 | + "../../../../test/distributed/tensor/test_common_rules.py": None, |
| 204 | + "../../../../test/distributed/tensor/test_dtensor.py": None, |
| 205 | + "../../../../test/distributed/tensor/test_dtensor_compile.py": None, |
| 206 | + "../../../../test/distributed/tensor/test_experimental_ops.py": None, |
| 207 | + "../../../../test/distributed/tensor/test_init.py": None, |
| 208 | + "../../../../test/distributed/tensor/test_math_ops.py": ( |
| 209 | + # RuntimeError: oneCCL: coll_param.cpp:455 validate: EXCEPTION: average operation is not supported for the scheduler path |
| 210 | + # https://github.com/intel/torch-xpu-ops/issues/1508 |
| 211 | + "test_mean", |
| 212 | + "test_nll_loss_and_cross_entropy", |
| 213 | + ), |
| 214 | + "../../../../test/distributed/tensor/test_random_ops.py": None, |
| 215 | + "../../../../test/distributed/tensor/test_redistribute.py": None, |
| 216 | + "../../../../test/distributed/tensor/test_tensor_ops.py": None, |
| 217 | + "../../../../test/distributed/tensor/experimental/test_register_sharding.py": None, |
140 | 218 | }
|
141 | 219 |
|
142 | 220 | skip_dict_python = {
|
143 | 221 | "distributed/test_c10d_ops_xccl.py": None,
|
144 | 222 | "distributed/test_c10d_xccl.py": None,
|
145 | 223 | "../../../../test/distributed/pipelining/test_schedule_multiproc.py": None, # Hang error.
|
146 | 224 | "../../../../test/distributed/pipelining/test_stage.py": None,
|
| 225 | + "../../../../test/distributed/pipelining/test_transformer.py": None, |
147 | 226 | }
|
0 commit comments