Skip to content

Commit 5b5b21d

Browse files
authored
feat: CPU-only build (#51)
1 parent 3f19246 commit 5b5b21d

10 files changed

+77
-60
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
### Added
1313

14+
- CPU-only build without `nvcc` requirement by [@XuehaiPan](https://github.com/XuehaiPan) in [#51](https://github.com/metaopt/TorchOpt/pull/51).
1415
- Use [`cibuildwheel`](https://github.com/pypa/cibuildwheel) to build wheels by [@XuehaiPan](https://github.com/XuehaiPan) in [#45](https://github.com/metaopt/TorchOpt/pull/45).
1516
- Use dynamic process number in CPU kernels by [@JieRen98](https://github.com/JieRen98) in [#42](https://github.com/metaopt/TorchOpt/pull/42).
1617

CMakeLists.txt

+24-13
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,43 @@
1313
# limitations under the License.
1414
# ==============================================================================
1515

16-
cmake_minimum_required(VERSION 3.4)
17-
project(torchopt LANGUAGES CXX CUDA)
16+
cmake_minimum_required(VERSION 3.8)
17+
project(torchopt LANGUAGES CXX)
1818

1919
if(NOT CMAKE_BUILD_TYPE)
2020
set(CMAKE_BUILD_TYPE Release)
2121
endif()
2222

23-
find_package(CUDA REQUIRED)
24-
cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS All)
25-
list(APPEND CUDA_NVCC_FLAGS ${CUDA_ARCH_FLAGS})
26-
2723
set(CMAKE_CXX_STANDARD 14)
28-
set(CMAKE_CUDA_STANDARD 14)
2924
set(CMAKE_CXX_STANDARD_REQUIRED ON)
3025
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pthread -fPIC -fopenmp")
3126
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
32-
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -O3")
27+
28+
find_package(CUDA)
29+
30+
if(CUDA_FOUND)
31+
message(STATUS "Found CUDA, enabling CUDA support.")
32+
enable_language(CUDA)
33+
34+
cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS All)
35+
list(APPEND CUDA_NVCC_FLAGS ${CUDA_ARCH_FLAGS})
36+
set(CMAKE_CUDA_STANDARD 14)
37+
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -O3")
38+
else()
39+
message(STATUS "CUDA not found, build for CPU-only.")
40+
endif()
3341

3442
function(system)
3543
set(options STRIP)
3644
set(oneValueArgs OUTPUT_VARIABLE ERROR_VARIABLE WORKING_DIRECTORY)
3745
set(multiValueArgs COMMAND)
38-
cmake_parse_arguments(SYSTEM
39-
"${options}"
40-
"${oneValueArgs}"
41-
"${multiValueArgs}"
42-
"${ARGN}")
46+
cmake_parse_arguments(
47+
SYSTEM
48+
"${options}"
49+
"${oneValueArgs}"
50+
"${multiValueArgs}"
51+
"${ARGN}"
52+
)
4353

4454
if(NOT DEFINED SYSTEM_WORKING_DIRECTORY)
4555
set(SYSTEM_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}")
@@ -51,6 +61,7 @@ function(system)
5161
ERROR_VARIABLE STDERR
5262
WORKING_DIRECTORY "${SYSTEM_WORKING_DIRECTORY}"
5363
)
64+
5465
if("${SYSTEM_STRIP}")
5566
string(STRIP "${STDOUT}" STDOUT)
5667
string(STRIP "${STDERR}" STDERR)

README.md

-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ pip3 install --no-build-isolation --editable .
252252

253253
- [ ] Support general implicit differentiation with functional programing.
254254
- [ ] Support more optimizers such as AdamW, RMSProp
255-
- [ ] CPU-accelerated optimizer
256255

257256
## Changelog
258257

File renamed without changes.
File renamed without changes.

pyproject.toml

+13-7
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,18 @@ environment-pass = ["CUDA_VERSION", "TEST_TORCH_SPECS"]
9292
container-engine = "docker"
9393

9494
before-all = """
95-
CUDA_VERSION="$(echo "${CUDA_VERSION:-"${DEFAULT_CUDA_VERSION}"}" | cut -d"." -f-2)"
96-
CUDA_PKG_SUFFIX="$(echo "${CUDA_VERSION}" | tr "." "-")"
97-
echo "CUDA_VERSION=${CUDA_VERSION}"
98-
yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
99-
yum clean all
100-
yum install -y nvidia-driver-latest-libs "cuda-minimal-build-${CUDA_PKG_SUFFIX}"
95+
CUDA_VERSION="${CUDA_VERSION:-"${DEFAULT_CUDA_VERSION}"}"
96+
if [[ "${CUDA_VERSION}" == "None" || "${CUDA_VERSION}" == "none" ]]; then
97+
sed -i -E "s/__version__\\s*=\\s*.*$/\\0 + '+cpu'/" torchopt/version.py
98+
else
99+
CUDA_VERSION="$(echo "${CUDA_VERSION}" | cut -d"." -f-2)"
100+
CUDA_PKG_SUFFIX="$(echo "${CUDA_VERSION}" | tr "." "-")"
101+
echo "CUDA_VERSION=${CUDA_VERSION}"
102+
yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
103+
yum clean all
104+
yum install -y nvidia-driver-latest-libs "cuda-minimal-build-${CUDA_PKG_SUFFIX}"
105+
fi
106+
echo "cat torchopt/version.py"; cat torchopt/version.py
101107
"""
102108
test-extras = ["test"]
103109
test-command = """
@@ -130,7 +136,7 @@ repair-wheel-command = """
130136
echo "ls ${TORCH_LIB_PATH}"; ls -lh "${TORCH_LIB_PATH}"
131137
python -m pip install --force-reinstall git+https://github.com/XuehaiPan/auditwheel.git@torchopt
132138
python -m auditwheel lddtree "{wheel}"
133-
python -m auditwheel repair --wheel-dir="{dest_dir}" "{wheel}"
139+
python -m auditwheel repair --no-copy-site-libs --wheel-dir="{dest_dir}" "{wheel}"
134140
"""
135141

136142
# Linter tools #################################################################

src/adam_op/CMakeLists.txt

+5-28
Original file line numberDiff line numberDiff line change
@@ -13,36 +13,13 @@
1313
# limitations under the License.
1414
# ==============================================================================
1515

16-
# add_library(
17-
# adam_op_CUDA SHARED
18-
# adam_op_impl.cu
19-
# )
16+
set(adam_op_src adam_op.cpp adam_op_impl_cpu.cpp)
2017

21-
# target_link_libraries(
22-
# adam_op_CUDA
23-
# ${TORCH_LIBRARIES}
24-
# )
18+
if(CUDA_FOUND)
19+
list(APPEND adam_op_src adam_op_impl_cuda.cu)
20+
endif()
2521

26-
# add_library(
27-
# adam_op_CPU SHARED
28-
# adam_op_impl.cpp
29-
# )
30-
31-
# target_link_libraries(
32-
# adam_op_CPU
33-
# ${TORCH_LIBRARIES}
34-
# )
35-
36-
# pybind11_add_module(adam_op adam_op.cpp)
37-
38-
# target_link_libraries(
39-
# adam_op PRIVATE
40-
# adam_op_CPU
41-
# adam_op_CUDA
42-
# ${TORCH_LIBRARIES}
43-
# )
44-
45-
pybind11_add_module(adam_op adam_op.cpp adam_op_impl.cpp adam_op_impl.cu)
22+
pybind11_add_module(adam_op "${adam_op_src}")
4623

4724
target_link_libraries(
4825
adam_op PRIVATE

src/adam_op/adam_op.cpp

+32-9
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,37 @@
1818
#include <pybind11/pybind11.h>
1919
#include <pybind11/stl.h>
2020

21-
#include "include/adam_op/adam_op_impl.cuh"
22-
#include "include/adam_op/adam_op_impl.h"
21+
#include "include/adam_op/adam_op_impl_cpu.h"
22+
#if defined(__CUDACC__)
23+
#include "include/adam_op/adam_op_impl_cuda.cuh"
24+
#endif
2325

2426
namespace torchopt {
2527
TensorArray<3> adamForwardInplace(const torch::Tensor& updates,
2628
const torch::Tensor& mu,
2729
const torch::Tensor& nu, const float b1,
2830
const float b2, const float eps,
2931
const float eps_root, const int count) {
32+
#if defined(__CUDACC__)
3033
if (updates.device().is_cuda()) {
3134
return adamForwardInplaceCUDA(updates, mu, nu, b1, b2, eps, eps_root,
3235
count);
33-
} else if (updates.device().is_cpu()) {
36+
}
37+
#endif
38+
if (updates.device().is_cpu()) {
3439
return adamForwardInplaceCPU(updates, mu, nu, b1, b2, eps, eps_root, count);
3540
} else {
3641
throw std::runtime_error("Not implemented");
3742
}
3843
}
3944
torch::Tensor adamForwardMu(const torch::Tensor& updates,
4045
const torch::Tensor& mu, const float b1) {
46+
#if defined(__CUDACC__)
4147
if (updates.device().is_cuda()) {
4248
return adamForwardMuCUDA(updates, mu, b1);
43-
} else if (updates.device().is_cpu()) {
49+
}
50+
#endif
51+
if (updates.device().is_cpu()) {
4452
return adamForwardMuCPU(updates, mu, b1);
4553
} else {
4654
throw std::runtime_error("Not implemented");
@@ -49,9 +57,12 @@ torch::Tensor adamForwardMu(const torch::Tensor& updates,
4957

5058
torch::Tensor adamForwardNu(const torch::Tensor& updates,
5159
const torch::Tensor& nu, const float b2) {
60+
#if defined(__CUDACC__)
5261
if (updates.device().is_cuda()) {
5362
return adamForwardNuCUDA(updates, nu, b2);
54-
} else if (updates.device().is_cpu()) {
63+
}
64+
#endif
65+
if (updates.device().is_cpu()) {
5566
return adamForwardNuCPU(updates, nu, b2);
5667
} else {
5768
throw std::runtime_error("Not implemented");
@@ -62,9 +73,12 @@ torch::Tensor adamForwardUpdates(const torch::Tensor& new_mu,
6273
const torch::Tensor& new_nu, const float b1,
6374
const float b2, const float eps,
6475
const float eps_root, const int count) {
76+
#if defined(__CUDACC__)
6577
if (new_mu.device().is_cuda()) {
6678
return adamForwardUpdatesCUDA(new_mu, new_nu, b1, b2, eps, eps_root, count);
67-
} else if (new_mu.device().is_cpu()) {
79+
}
80+
#endif
81+
if (new_mu.device().is_cpu()) {
6882
return adamForwardUpdatesCPU(new_mu, new_nu, b1, b2, eps, eps_root, count);
6983
} else {
7084
throw std::runtime_error("Not implemented");
@@ -74,9 +88,12 @@ torch::Tensor adamForwardUpdates(const torch::Tensor& new_mu,
7488
TensorArray<2> adamBackwardMu(const torch::Tensor& dmu,
7589
const torch::Tensor& updates,
7690
const torch::Tensor& mu, const float b1) {
91+
#if defined(__CUDACC__)
7792
if (dmu.device().is_cuda()) {
7893
return adamBackwardMuCUDA(dmu, updates, mu, b1);
79-
} else if (dmu.device().is_cpu()) {
94+
}
95+
#endif
96+
if (dmu.device().is_cpu()) {
8097
return adamBackwardMuCPU(dmu, updates, mu, b1);
8198
} else {
8299
throw std::runtime_error("Not implemented");
@@ -86,9 +103,12 @@ TensorArray<2> adamBackwardMu(const torch::Tensor& dmu,
86103
TensorArray<2> adamBackwardNu(const torch::Tensor& dnu,
87104
const torch::Tensor& updates,
88105
const torch::Tensor& nu, const float b2) {
106+
#if defined(__CUDACC__)
89107
if (dnu.device().is_cuda()) {
90108
return adamBackwardNuCUDA(dnu, updates, nu, b2);
91-
} else if (dnu.device().is_cpu()) {
109+
}
110+
#endif
111+
if (dnu.device().is_cpu()) {
92112
return adamBackwardNuCPU(dnu, updates, nu, b2);
93113
} else {
94114
throw std::runtime_error("Not implemented");
@@ -100,10 +120,13 @@ TensorArray<2> adamBackwardUpdates(const torch::Tensor& dupdates,
100120
const torch::Tensor& new_mu,
101121
const torch::Tensor& new_nu, const float b1,
102122
const float b2, const int count) {
123+
#if defined(__CUDACC__)
103124
if (dupdates.device().is_cuda()) {
104125
return adamBackwardUpdatesCUDA(dupdates, updates, new_mu, new_nu, b1, b2,
105126
count);
106-
} else if (dupdates.device().is_cpu()) {
127+
}
128+
#endif
129+
if (dupdates.device().is_cpu()) {
107130
return adamBackwardUpdatesCPU(dupdates, updates, new_mu, new_nu, b1, b2,
108131
count);
109132
} else {

src/adam_op/adam_op_impl.cpp src/adam_op/adam_op_impl_cpu.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
// limitations under the License.
1414
// ==============================================================================
1515

16-
#include "include/adam_op/adam_op_impl.h"
16+
#include "include/adam_op/adam_op_impl_cpu.h"
1717

1818
#include <omp.h>
1919
#include <torch/extension.h>

src/adam_op/adam_op_impl.cu src/adam_op/adam_op_impl_cuda.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
#include <vector>
1919

20-
#include "include/adam_op/adam_op_impl.cuh"
20+
#include "include/adam_op/adam_op_impl_cuda.cuh"
2121
#include "include/utils.h"
2222

2323
namespace torchopt {

0 commit comments

Comments
 (0)