-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup.py
79 lines (64 loc) · 2.7 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Copyright (c) 2018-present, Cruise LLC
#
# This source code is licensed under the Apache License, Version 2.0,
# found in the LICENSE file in the root directory of this source tree.
# You may not use this file except in compliance with the License.
# Authored by Liyan Chen ([email protected])
import os
import sys
import torch
from pathlib import Path
from skbuild import setup
ARCH_NAME_MAP = {
'75': '7.5', '80': '8.0', '86': '8.6', '87': '8.7', '89': '8.9', '90': '9.0', '90a': '9.0a',
'95': '9.5', '100': '10.0', '100a': '10.0a', '101': '10.1', '101a': '10.1a', '120': '12.0',
'120a': '12.0a'
}
def cmakearch2torcharch(arch_list):
return ';'.join(ARCH_NAME_MAP[k] for k in arch_list.split(';'))
torch_rt = os.path.dirname(torch.__file__)
print(torch_rt)
if 'CXX' in os.environ:
cxx = os.environ['CXX']
print(f'env `CXX` set as {cxx}')
else:
print(f'env `CXX` unset, using system-default cc')
if 'F3D_CUDA_ARCH' in os.environ:
cuda_arch = os.environ['F3D_CUDA_ARCH']
# Ad-hoc macro for Orin. One can further specify Orin Max Shared Memory to be 192KB according to spec.
# This flag will set Max Shared Memory to be 100KB for Orin, but still allow nvcc for Orin-specific optimizations.
if '89' in cuda_arch or '87' in cuda_arch:
tk_arch = 'KITTENS_4090'
elif '80' in cuda_arch:
tk_arch = 'KITTENS_A100'
elif '90' in cuda_arch:
tk_arch = 'KITTENS_HOPPER'
# Ad-hoc mac for Thor. One can further specify Thor Max Shared Memory to be ???. (Spec sheets not available now)
# This flag will set Max Shared Memory to be 227KB for Thor, but still allow nvcc for Thor-specific optimizations.
elif '100' in cuda_arch:
tk_arch = 'KITTENS_HOPPER'
else:
print(f'ThunderKitten has NO support for `CUDA_ARCHITECTURES`={cuda_arch}')
os.environ['TORCH_CUDA_ARCH_LIST'] = cmakearch2torcharch(cuda_arch)
tk_flag = f'-D{tk_arch}'
print(f'env `CUDA_ARCHITECTURES` set as {cuda_arch}, TK arch macro set as {tk_arch}')
else:
raise RuntimeError(f'No `F3D_CUDA_ARCH` set in env')
nvcc_path_flag = []
if 'CUDA_HOME' in os.environ:
cudadir = os.environ['CUDA_HOME']
nvcc_path_flag.append(f'-DCMAKE_CUDA_COMPILER={cudadir}/bin/nvcc')
print(f'env `CUDA_HOME` set as {cudadir}')
else:
print(f'env `CUDA_HOME` unset, using system-default nvcc')
setup(
name="flash3dxfmr",
author='Liyan Chen',
author_email='[email protected]',
version="0.1.rc2",
description="Flash3D Point Transformers",
license="Apache License 2.0",
packages=['flash3dxfmr', 'flash3dxfmr.lib'],
cmake_args=[f'-DCMAKE_PREFIX_PATH={torch_rt}', f'-DTK_FLAGS={tk_flag}'] + nvcc_path_flag,
cmake_minimum_required_version='3.25'
)