Open
Description
These tests were disabled in #1624 due to lack of triton support at the time.
I tried it again today with a fresh PyTorch build, and got the following error: https://gist.github.com/vkuzo/7f345ac91f3d4a520a866cf2c727ec12
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp91 = tmp89 | tmp90
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp92 = tl.full([1], 0, tl.int32)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp93 = tl.where(tmp81, tmp92, tmp91)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp94 = tmp65 > tmp80
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp95 = tmp83 == tmp80
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp96 = tmp94 & tmp95
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp97 = tl.where(tmp96, tmp79, tmp93)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp98 = tmp64.to(tl.int32)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp99 = tmp98 << tmp58
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp100 = tmp97 | tmp99
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp101 = tmp100.to(tl.float32, bitcast=True)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tmp103 = tmp101 * tmp102
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] tl.store(out_ptr4 + (x2), tmp103, xmask)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532]
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] metadata: {'signature': {'in_ptr0': '*fp32', 'in_ptr1': '*fp32', 'out_ptr4': '*fp32', 'xnumel': 'i32'}, 'device': 0, 'constants': {'XBLOCK': 64},
'configs': [AttrsDescriptor.from_dict({'arg_properties': {'tt.divisibility': (0, 1, 2, 3), 'tt.equal_to': ()}, 'cls': 'AttrsDescriptor'})], 'device_type': 'cuda', 'num_warps': 1, 'num_stages': 1, 'debug': True, 'cc': 100}
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] Traceback (most recent call last):
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] File "/home/vasiliy/.conda/envs/pytorch/lib/python3.11/site-packages/triton/backends/nvidia/compiler.py", line 356, in make_cubin
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] subprocess.run(ptxas_cmd, check=True, close_fds=False, stderr=flog)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] File "/home/vasiliy/.conda/envs/pytorch/lib/python3.11/subprocess.py", line 569, in run
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] raise CalledProcessError(retcode, process.args,
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] subprocess.CalledProcessError: Command '['/home/vasiliy/.conda/envs/pytorch/lib/python3.11/site-packages/triton/backends/nvidia/bin/ptxas', '-lin
einfo', '-v', '--gpu-name=sm_100', '/tmp/tmpodrv3kxq.ptx', '-o', '/tmp/tmpodrv3kxq.ptx.o']' returned non-zero exit status 255.
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532]
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] During handling of the above exception, another exception occurred:
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532]
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] Traceback (most recent call last):
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] File "/data/users/vasiliy/pytorch/torch/_inductor/runtime/triton_heuristics.py", line 530, in _precompile_config
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] binary = triton.compile(*compile_args, **compile_kwargs)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] File "/home/vasiliy/.conda/envs/pytorch/lib/python3.11/site-packages/triton/compiler/compiler.py", line 279, in compile
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] next_module = compile_ir(module, metadata)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] File "/home/vasiliy/.conda/envs/pytorch/lib/python3.11/site-packages/triton/backends/nvidia/compiler.py", line 389, in <lambda>
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] stages["cubin"] = lambda src, metadata: self.make_cubin(src, metadata, options, self.capability)
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] File "/home/vasiliy/.conda/envs/pytorch/lib/python3.11/site-packages/triton/backends/nvidia/compiler.py", line 374, in make_cubin
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] raise RuntimeError(f'{error}\n'
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] RuntimeError: Internal Triton PTX codegen error
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] `ptxas` stderr:
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] ptxas fatal : Value 'sm_100' is not defined for option 'gpu-name'
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532]
E0219 13:51:06.311000 2616135 torch/_inductor/runtime/triton_heuristics.py:532] Repro command: /home/vasiliy/.conda/envs/pytorch/lib/python3.11/site-packages/triton/backends/nvidia/bin/ptxas -lineinfo -v --gpu-name=sm_100 /tm
p/tmpodrv3kxq.ptx -o /tmp/tmpodrv3kxq.ptx.o
Metadata
Metadata
Assignees
Labels
No labels