From 787667d6ea223a4f24abf6f2e78dd8253852c00d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 10:54:26 -0500 Subject: [PATCH 001/137] Update version --- CMakeLists.txt | 2 +- ports/int128/portfile.cmake | 4 ++-- ports/int128/vcpkg.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ff815bb..a965c929 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 3.10...3.20) # Set version explicitly if not part of Boost superproject if(NOT BOOST_SUPERPROJECT_VERSION) - set(BOOST_INT128_VERSION 1.6.0) + set(BOOST_INT128_VERSION 1.6.1) else() set(BOOST_INT128_VERSION ${BOOST_SUPERPROJECT_VERSION}) endif() diff --git a/ports/int128/portfile.cmake b/ports/int128/portfile.cmake index ca3cd128..5c61b3b7 100644 --- a/ports/int128/portfile.cmake +++ b/ports/int128/portfile.cmake @@ -7,8 +7,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO cppalliance/int128 - REF v1.6.0 - SHA512 cc5a5e065643ee5b790f35441b34cea291bd299f5460278bb3402005c9cdd5a9a552b28e64816cc0ed480662c53959eb7cd5d1774d7e2480fffc65532353dc5c + REF v1.6.1 + SHA512 84978c581edd5d4b2b9ecaf9229b552dae4c686387d7e3149a2fd7ff4736a045ba6712611f6560ed58084f0f0cd2a5fc4d32759bb0d4166529f50bc066dc59e0 HEAD_REF master ) diff --git a/ports/int128/vcpkg.json b/ports/int128/vcpkg.json index 5863488b..6548a469 100644 --- a/ports/int128/vcpkg.json +++ b/ports/int128/vcpkg.json @@ -1,6 +1,6 @@ { "name": "int128", - "version": "1.6.0", + "version": "1.6.1", "description": "Portable and performant 128-bit integers", "homepage": "https://github.com/cppalliance/int128", "license": "BSL-1.0", From ba49223f35902eb556cc3c498a35314a5b02cba4 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 10:51:11 -0500 Subject: [PATCH 002/137] Duplicate fuzzing files for additional operations --- .../test_fuzzing_add_versus_wide_int.cpp | 141 ++++++++++++++++++ .../test_fuzzing_mul_versus_wide_int.cpp | 141 ++++++++++++++++++ .../test_fuzzing_sub_versus_wide_int.cpp | 141 ++++++++++++++++++ 3 files changed, 423 insertions(+) create mode 100644 test/fuzzing/test_fuzzing_add_versus_wide_int.cpp create mode 100644 test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp create mode 100644 test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp diff --git a/test/fuzzing/test_fuzzing_add_versus_wide_int.cpp b/test/fuzzing/test_fuzzing_add_versus_wide_int.cpp new file mode 100644 index 00000000..216ec8e6 --- /dev/null +++ b/test/fuzzing/test_fuzzing_add_versus_wide_int.cpp @@ -0,0 +1,141 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright Matt Borland 2025. +// Copyright Christopher Kormanyos 2024 - 2025. +// Distributed under the Boost Software License, +// Version 1.0. (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// cd /mnt/c/Users/ckorm/Documents/Ks/PC_Software/NumericalPrograms/ExtendedNumberTypes/wide_integer +// clang++ -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I. -I/mnt/c/ChrisGitRepos/cppalliance/int128/include -I../NumericalPrograms/ExtendedNumberTypes/wide_integer test/fuzzing/test_fuzzing_div_versus_cppalliance_int128.cpp -o test_fuzzing_div_versus_cppalliance_int128 +// ./test_fuzzing_div_versus_cppalliance_int128 -max_total_time=1200 -max_len=32 + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +extern "C" +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); + +namespace fuzzing +{ + template + auto eval_op(const CntrlUintType& a_cntrl, + const CntrlUintType& b_cntrl, + const LocalUintType& a_local, + const LocalUintType& b_local) -> bool; +} + +template +auto fuzzing::eval_op(const CntrlUintType& a_cntrl, + const CntrlUintType& b_cntrl, + const LocalUintType& a_local, + const LocalUintType& b_local) -> bool +{ + using cntrl_uint_type = CntrlUintType; + using local_uint_type = LocalUintType; + + static_assert + ( + (std::numeric_limits::digits == std::numeric_limits::digits) + && (std::numeric_limits::digits == int { INT32_C(128) }), + "Error: the control and local types must both have 128 binary digits" + ); + + const local_uint_type result_local { local_uint_type(a_local) += b_local }; + const cntrl_uint_type result_cntrl { cntrl_uint_type(a_cntrl) += b_cntrl }; + + const std::uint64_t result_local_lo = static_cast(result_local); + const std::uint64_t result_local_hi = static_cast(result_local >> unsigned { UINT8_C(64) }); + + const std::uint64_t result_cntrl_lo = static_cast(result_cntrl); + const std::uint64_t result_cntrl_hi = static_cast(result_cntrl >> unsigned { UINT8_C(64) }); + + // Verify that both the local (test) type as well as control type + // obtain the same numerical result. + + const bool + result_is_ok + { + (result_local_lo == result_cntrl_lo) + && (result_local_hi == result_cntrl_hi) + }; + + if(!result_is_ok) + { + std::cout << "Error: lhs: " << a_local << ", rhs: " << b_local << ", result obtained: " << result_local << std::endl; + } + + return result_is_ok; +} + +// The fuzzing entry point. +extern "C" +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + constexpr std::size_t max_size { UINT8_C(32) }; + constexpr std::size_t min_size { UINT8_C(17) }; + + bool result_is_ok { true }; + + if(((size >= min_size) && (size <= max_size)) && (data != nullptr)) + { + using local_data_array_type = std::array; + + local_data_array_type tmp_data { }; + + tmp_data.fill(UINT8_C(0)); + + static_cast(std::copy(data, data + size, tmp_data.begin())); + + const std::uint64_t a_lo64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(0) }) }; + const std::uint64_t a_hi64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(8) }) }; + const std::uint64_t b_lo64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(16) }) }; + const std::uint64_t b_hi64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(24) }) }; + + // Import data into the uint values. + using local_uint_type = ::boost::int128::uint128_t; + + #if defined(WIDE_INTEGER_NAMESPACE) + using cntrl_uint_type = ::WIDE_INTEGER_NAMESPACE::math::wide_integer::uint128_t; + #else + using cntrl_uint_type = ::math::wide_integer::uint128_t; + #endif + + cntrl_uint_type a_cntrl { a_hi64 }; a_cntrl <<= unsigned { UINT8_C(64) }; a_cntrl |= a_lo64; + cntrl_uint_type b_cntrl { b_hi64 }; b_cntrl <<= unsigned { UINT8_C(64) }; b_cntrl |= b_lo64; + + local_uint_type a_local { a_hi64 }; a_local <<= unsigned { UINT8_C(64) }; a_local |= a_lo64; + local_uint_type b_local { b_hi64 }; b_local <<= unsigned { UINT8_C(64) }; b_local |= b_lo64; + + if(a_local < b_local) + { + std::swap(a_local, b_local); + std::swap(a_cntrl, b_cntrl); + } + + if(b_local != 0U) + { + const bool result_op_is_ok { fuzzing::eval_op(a_cntrl, b_cntrl, a_local, b_local) }; + + if(!result_op_is_ok) + { + assert(result_op_is_ok); + } + + result_is_ok = (result_op_is_ok && result_is_ok); + } + } + + return (result_is_ok ? 0 : -1); +} diff --git a/test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp b/test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp new file mode 100644 index 00000000..2d7da2dc --- /dev/null +++ b/test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp @@ -0,0 +1,141 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright Matt Borland 2025. +// Copyright Christopher Kormanyos 2024 - 2025. +// Distributed under the Boost Software License, +// Version 1.0. (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// cd /mnt/c/Users/ckorm/Documents/Ks/PC_Software/NumericalPrograms/ExtendedNumberTypes/wide_integer +// clang++ -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I. -I/mnt/c/ChrisGitRepos/cppalliance/int128/include -I../NumericalPrograms/ExtendedNumberTypes/wide_integer test/fuzzing/test_fuzzing_div_versus_cppalliance_int128.cpp -o test_fuzzing_div_versus_cppalliance_int128 +// ./test_fuzzing_div_versus_cppalliance_int128 -max_total_time=1200 -max_len=32 + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +extern "C" +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); + +namespace fuzzing +{ + template + auto eval_op(const CntrlUintType& a_cntrl, + const CntrlUintType& b_cntrl, + const LocalUintType& a_local, + const LocalUintType& b_local) -> bool; +} + +template +auto fuzzing::eval_op(const CntrlUintType& a_cntrl, + const CntrlUintType& b_cntrl, + const LocalUintType& a_local, + const LocalUintType& b_local) -> bool +{ + using cntrl_uint_type = CntrlUintType; + using local_uint_type = LocalUintType; + + static_assert + ( + (std::numeric_limits::digits == std::numeric_limits::digits) + && (std::numeric_limits::digits == int { INT32_C(128) }), + "Error: the control and local types must both have 128 binary digits" + ); + + const local_uint_type result_local { local_uint_type(a_local) *= b_local }; + const cntrl_uint_type result_cntrl { cntrl_uint_type(a_cntrl) *= b_cntrl }; + + const std::uint64_t result_local_lo = static_cast(result_local); + const std::uint64_t result_local_hi = static_cast(result_local >> unsigned { UINT8_C(64) }); + + const std::uint64_t result_cntrl_lo = static_cast(result_cntrl); + const std::uint64_t result_cntrl_hi = static_cast(result_cntrl >> unsigned { UINT8_C(64) }); + + // Verify that both the local (test) type as well as control type + // obtain the same numerical result. + + const bool + result_is_ok + { + (result_local_lo == result_cntrl_lo) + && (result_local_hi == result_cntrl_hi) + }; + + if(!result_is_ok) + { + std::cout << "Error: lhs: " << a_local << ", rhs: " << b_local << ", result obtained: " << result_local << std::endl; + } + + return result_is_ok; +} + +// The fuzzing entry point. +extern "C" +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + constexpr std::size_t max_size { UINT8_C(32) }; + constexpr std::size_t min_size { UINT8_C(17) }; + + bool result_is_ok { true }; + + if(((size >= min_size) && (size <= max_size)) && (data != nullptr)) + { + using local_data_array_type = std::array; + + local_data_array_type tmp_data { }; + + tmp_data.fill(UINT8_C(0)); + + static_cast(std::copy(data, data + size, tmp_data.begin())); + + const std::uint64_t a_lo64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(0) }) }; + const std::uint64_t a_hi64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(8) }) }; + const std::uint64_t b_lo64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(16) }) }; + const std::uint64_t b_hi64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(24) }) }; + + // Import data into the uint values. + using local_uint_type = ::boost::int128::uint128_t; + + #if defined(WIDE_INTEGER_NAMESPACE) + using cntrl_uint_type = ::WIDE_INTEGER_NAMESPACE::math::wide_integer::uint128_t; + #else + using cntrl_uint_type = ::math::wide_integer::uint128_t; + #endif + + cntrl_uint_type a_cntrl { a_hi64 }; a_cntrl <<= unsigned { UINT8_C(64) }; a_cntrl |= a_lo64; + cntrl_uint_type b_cntrl { b_hi64 }; b_cntrl <<= unsigned { UINT8_C(64) }; b_cntrl |= b_lo64; + + local_uint_type a_local { a_hi64 }; a_local <<= unsigned { UINT8_C(64) }; a_local |= a_lo64; + local_uint_type b_local { b_hi64 }; b_local <<= unsigned { UINT8_C(64) }; b_local |= b_lo64; + + if(a_local < b_local) + { + std::swap(a_local, b_local); + std::swap(a_cntrl, b_cntrl); + } + + if(b_local != 0U) + { + const bool result_op_is_ok { fuzzing::eval_op(a_cntrl, b_cntrl, a_local, b_local) }; + + if(!result_op_is_ok) + { + assert(result_op_is_ok); + } + + result_is_ok = (result_op_is_ok && result_is_ok); + } + } + + return (result_is_ok ? 0 : -1); +} diff --git a/test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp b/test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp new file mode 100644 index 00000000..e5bff887 --- /dev/null +++ b/test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp @@ -0,0 +1,141 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright Matt Borland 2025. +// Copyright Christopher Kormanyos 2024 - 2025. +// Distributed under the Boost Software License, +// Version 1.0. (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// cd /mnt/c/Users/ckorm/Documents/Ks/PC_Software/NumericalPrograms/ExtendedNumberTypes/wide_integer +// clang++ -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I. -I/mnt/c/ChrisGitRepos/cppalliance/int128/include -I../NumericalPrograms/ExtendedNumberTypes/wide_integer test/fuzzing/test_fuzzing_div_versus_cppalliance_int128.cpp -o test_fuzzing_div_versus_cppalliance_int128 +// ./test_fuzzing_div_versus_cppalliance_int128 -max_total_time=1200 -max_len=32 + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +extern "C" +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); + +namespace fuzzing +{ + template + auto eval_op(const CntrlUintType& a_cntrl, + const CntrlUintType& b_cntrl, + const LocalUintType& a_local, + const LocalUintType& b_local) -> bool; +} + +template +auto fuzzing::eval_op(const CntrlUintType& a_cntrl, + const CntrlUintType& b_cntrl, + const LocalUintType& a_local, + const LocalUintType& b_local) -> bool +{ + using cntrl_uint_type = CntrlUintType; + using local_uint_type = LocalUintType; + + static_assert + ( + (std::numeric_limits::digits == std::numeric_limits::digits) + && (std::numeric_limits::digits == int { INT32_C(128) }), + "Error: the control and local types must both have 128 binary digits" + ); + + const local_uint_type result_local { local_uint_type(a_local) -= b_local }; + const cntrl_uint_type result_cntrl { cntrl_uint_type(a_cntrl) -= b_cntrl }; + + const std::uint64_t result_local_lo = static_cast(result_local); + const std::uint64_t result_local_hi = static_cast(result_local >> unsigned { UINT8_C(64) }); + + const std::uint64_t result_cntrl_lo = static_cast(result_cntrl); + const std::uint64_t result_cntrl_hi = static_cast(result_cntrl >> unsigned { UINT8_C(64) }); + + // Verify that both the local (test) type as well as control type + // obtain the same numerical result. + + const bool + result_is_ok + { + (result_local_lo == result_cntrl_lo) + && (result_local_hi == result_cntrl_hi) + }; + + if(!result_is_ok) + { + std::cout << "Error: lhs: " << a_local << ", rhs: " << b_local << ", result obtained: " << result_local << std::endl; + } + + return result_is_ok; +} + +// The fuzzing entry point. +extern "C" +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + constexpr std::size_t max_size { UINT8_C(32) }; + constexpr std::size_t min_size { UINT8_C(17) }; + + bool result_is_ok { true }; + + if(((size >= min_size) && (size <= max_size)) && (data != nullptr)) + { + using local_data_array_type = std::array; + + local_data_array_type tmp_data { }; + + tmp_data.fill(UINT8_C(0)); + + static_cast(std::copy(data, data + size, tmp_data.begin())); + + const std::uint64_t a_lo64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(0) }) }; + const std::uint64_t a_hi64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(8) }) }; + const std::uint64_t b_lo64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(16) }) }; + const std::uint64_t b_hi64 { *reinterpret_cast(tmp_data.data() + std::size_t { UINT8_C(24) }) }; + + // Import data into the uint values. + using local_uint_type = ::boost::int128::uint128_t; + + #if defined(WIDE_INTEGER_NAMESPACE) + using cntrl_uint_type = ::WIDE_INTEGER_NAMESPACE::math::wide_integer::uint128_t; + #else + using cntrl_uint_type = ::math::wide_integer::uint128_t; + #endif + + cntrl_uint_type a_cntrl { a_hi64 }; a_cntrl <<= unsigned { UINT8_C(64) }; a_cntrl |= a_lo64; + cntrl_uint_type b_cntrl { b_hi64 }; b_cntrl <<= unsigned { UINT8_C(64) }; b_cntrl |= b_lo64; + + local_uint_type a_local { a_hi64 }; a_local <<= unsigned { UINT8_C(64) }; a_local |= a_lo64; + local_uint_type b_local { b_hi64 }; b_local <<= unsigned { UINT8_C(64) }; b_local |= b_lo64; + + if(a_local < b_local) + { + std::swap(a_local, b_local); + std::swap(a_cntrl, b_cntrl); + } + + if(b_local != 0U) + { + const bool result_op_is_ok { fuzzing::eval_op(a_cntrl, b_cntrl, a_local, b_local) }; + + if(!result_op_is_ok) + { + assert(result_op_is_ok); + } + + result_is_ok = (result_op_is_ok && result_is_ok); + } + } + + return (result_is_ok ? 0 : -1); +} From 72848df323d131c0f55ced56aea770e892b9c16c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 10:51:21 -0500 Subject: [PATCH 003/137] Fuzz add, sub, mul --- .github/workflows/fuzz.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 1cca9a72..abf0a2d0 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -43,6 +43,21 @@ jobs: echo "run test_fuzzing_div_versus_wide_int" ./test_fuzzing_div_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + echo 'compiling test/fuzzing/test_fuzzing_add_versus_wide_int.cpp' + ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer test/fuzzing/test_fuzzing_add_versus_wide_int.cpp -o test_fuzzing_add_versus_wide_int + echo "run test_fuzzing_add_versus_wide_int" + ./test_fuzzing_add_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + + echo 'compiling test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp' + ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp -o test_fuzzing_sub_versus_wide_int + echo "run test_fuzzing_sub_versus_wide_int" + ./test_fuzzing_sub_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + + echo 'compiling test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp' + ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp -o test_fuzzing_mul_versus_wide_int + echo "run test_fuzzing_mul_versus_wide_int" + ./test_fuzzing_mul_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + clang-fuzzing-versus-wide-integer-no-builtin: runs-on: ubuntu-latest defaults: From 79cc4bbcfe895cbf0d1ef9bb7abdedfa360c49a3 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 10:56:37 -0500 Subject: [PATCH 004/137] Add fuzzing of no intrin path --- .github/workflows/fuzz.yml | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index abf0a2d0..c0dc100c 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -41,22 +41,22 @@ jobs: echo 'compiling test/fuzzing/test_fuzzing_div_versus_wide_int.cpp' ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer test/fuzzing/test_fuzzing_div_versus_wide_int.cpp -o test_fuzzing_div_versus_wide_int echo "run test_fuzzing_div_versus_wide_int" - ./test_fuzzing_div_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + ./test_fuzzing_div_versus_wide_int -max_total_time=600 -max_len=32 -verbosity=0 -close_fd_mask=3 echo 'compiling test/fuzzing/test_fuzzing_add_versus_wide_int.cpp' ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer test/fuzzing/test_fuzzing_add_versus_wide_int.cpp -o test_fuzzing_add_versus_wide_int echo "run test_fuzzing_add_versus_wide_int" - ./test_fuzzing_add_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + ./test_fuzzing_add_versus_wide_int -max_total_time=300 -max_len=32 -verbosity=0 -close_fd_mask=3 echo 'compiling test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp' ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp -o test_fuzzing_sub_versus_wide_int echo "run test_fuzzing_sub_versus_wide_int" - ./test_fuzzing_sub_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + ./test_fuzzing_sub_versus_wide_int -max_total_time=300 -max_len=32 -verbosity=0 -close_fd_mask=3 echo 'compiling test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp' ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp -o test_fuzzing_mul_versus_wide_int echo "run test_fuzzing_mul_versus_wide_int" - ./test_fuzzing_mul_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + ./test_fuzzing_mul_versus_wide_int -max_total_time=300 -max_len=32 -verbosity=0 -close_fd_mask=3 clang-fuzzing-versus-wide-integer-no-builtin: runs-on: ubuntu-latest @@ -83,4 +83,19 @@ jobs: echo 'compiling test/fuzzing/test_fuzzing_div_versus_wide_int.cpp' ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer -DBOOST_INT128_NO_BUILTIN_INT128=1 test/fuzzing/test_fuzzing_div_versus_wide_int.cpp -o test_fuzzing_div_versus_wide_int echo "run test_fuzzing_div_versus_wide_int" - ./test_fuzzing_div_versus_wide_int -max_total_time=900 -max_len=32 -verbosity=0 -close_fd_mask=3 + ./test_fuzzing_div_versus_wide_int -max_total_time=600 -max_len=32 -verbosity=0 -close_fd_mask=3 + + echo 'compiling test/fuzzing/test_fuzzing_add_versus_wide_int.cpp' + ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer -DBOOST_INT128_NO_BUILTIN_INT128=1 test/fuzzing/test_fuzzing_add_versus_wide_int.cpp -o test_fuzzing_add_versus_wide_int + echo "run test_fuzzing_add_versus_wide_int" + ./test_fuzzing_add_versus_wide_int -max_total_time=300 -max_len=32 -verbosity=0 -close_fd_mask=3 + + echo 'compiling test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp' + ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer -DBOOST_INT128_NO_BUILTIN_INT128=1 test/fuzzing/test_fuzzing_sub_versus_wide_int.cpp -o test_fuzzing_sub_versus_wide_int + echo "run test_fuzzing_sub_versus_wide_int" + ./test_fuzzing_sub_versus_wide_int -max_total_time=300 -max_len=32 -verbosity=0 -close_fd_mask=3 + + echo 'compiling test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp' + ${{ matrix.compiler }} -std=c++20 -g -O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -fsanitize=fuzzer -I./include -I${{runner.workspace}}/wide-integer -DBOOST_INT128_NO_BUILTIN_INT128=1 test/fuzzing/test_fuzzing_mul_versus_wide_int.cpp -o test_fuzzing_mul_versus_wide_int + echo "run test_fuzzing_mul_versus_wide_int" + ./test_fuzzing_mul_versus_wide_int -max_total_time=300 -max_len=32 -verbosity=0 -close_fd_mask=3 From 218a04f8609743b17781faaf1bb0a48c31980c4d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 12:25:11 -0500 Subject: [PATCH 005/137] Attempt remove old workaround --- examples/math_and_random.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/examples/math_and_random.cpp b/examples/math_and_random.cpp index 8a0a5110..9e76f7aa 100644 --- a/examples/math_and_random.cpp +++ b/examples/math_and_random.cpp @@ -25,17 +25,6 @@ #include #include -// For some bizare reason that I can't figure out Clang-Cl x86 in Github Actions crashes only with C++14 -// I can't replicate this crash locally -#if defined(_WIN32) && defined(__clang__) && defined(__cplusplus) && __cplusplus == 201402L - -int main() -{ - return 0; -} - -#else - int main() { // Setup our rng and distribution @@ -56,5 +45,3 @@ int main() return 0; } - -#endif From 12836803a4fcc7fdd321f3987e6c6969d9a81d37 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 12:28:38 -0500 Subject: [PATCH 006/137] Update link to docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fbc62835..d55e7bae 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ The entire library can be conveniently included with `#include Date: Thu, 26 Feb 2026 14:14:45 -0500 Subject: [PATCH 007/137] Update apple runners --- .github/workflows/ci.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2a945c9a..15396eb6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -298,12 +298,12 @@ jobs: source_keys: - "https://apt.llvm.org/llvm-snapshot.gpg.key" - - toolset: clang - cxxstd: "03,11,14,17,20,2b" - os: macos-14 - toolset: clang cxxstd: "03,11,14,17,20,2b" os: macos-15 + - toolset: clang + cxxstd: "03,11,14,17,20,2b,2c" + os: macos-26 timeout-minutes: 180 runs-on: ${{matrix.os}} @@ -620,8 +620,8 @@ jobs: include: - os: ubuntu-22.04 - os: ubuntu-24.04 - - os: macos-14 - os: macos-15 + - os: macos-26 runs-on: ${{matrix.os}} @@ -666,8 +666,8 @@ jobs: fail-fast: false matrix: include: - - os: macos-14 - os: macos-15 + - os: macos-26 runs-on: ${{matrix.os}} @@ -724,8 +724,8 @@ jobs: include: - os: ubuntu-22.04 - os: ubuntu-24.04 - - os: macos-14 - os: macos-15 + - os: macos-26 runs-on: ${{matrix.os}} @@ -780,8 +780,8 @@ jobs: include: - os: ubuntu-22.04 - os: ubuntu-24.04 - - os: macos-14 - os: macos-15 + - os: macos-26 runs-on: ${{matrix.os}} @@ -952,8 +952,8 @@ jobs: matrix: include: - os: ubuntu-24.04 - - os: macos-14 - os: macos-15 + - os: macos-26 runs-on: ${{matrix.os}} From fddb00e73dc2d1cda223ab9a3a24bbe08b784563 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 14:39:02 -0500 Subject: [PATCH 008/137] Improve the overview --- doc/modules/ROOT/pages/overview.adoc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/modules/ROOT/pages/overview.adoc b/doc/modules/ROOT/pages/overview.adoc index 236a54d4..01fb1a36 100644 --- a/doc/modules/ROOT/pages/overview.adoc +++ b/doc/modules/ROOT/pages/overview.adoc @@ -15,16 +15,22 @@ Matt Borland Boost.Int128 is a portable implementation of a signed, and an unsigned 128-bit integer and related functionality (i.e. ``, ``, etc). Importantly, on all platforms, the `sizeof` the types provided in this library are exactly 128-bits. -The library is header-only, has no dependencies, and requires only C++14. +The library is header-only, has no dependencies, and requires only pass:[C++14]. +When using pass:[C++20] or newer, the library can be consumed as a module `import boost.int128`. == Motivation -128-bit integers are remarkably useful in a number of domains, but portability is often an issue. -An example is a 64-bit machine running Linux (say Ubuntu 24.04) has `__int128`, but the same exact machine running Windows does not have this type. +128-bit integers are useful across many domains, but pass:[C++] provides no portable way to use them. +GCC and Clang offer `__int128` as a non-standard extension on 64-bit targets, but it lacks `std::numeric_limits` specializations, `` support, and is absent entirely on MSVC. +Multiprecision libraries can fill the gap, but typically at the cost of a larger `sizeof` and additional overhead. +Boost.Int128 solves this by providing types that are exactly 128-bits on every platform. +Operation implementations rely on compiler intrinsic where available for native performance, and optimized software implementations elsewhere. == Use Cases -Anywhere 128-bit integers are needed. +* **Networking** — IPv6 addresses are 128 bits wide; a single integer makes masking, comparison, and arithmetic straightforward. +* **Unique identifiers** — UUIDs / GUIDs are 128-bit values commonly used as database keys and distributed system identifiers. +* **Scientific and Financial computing** — Extended-range accumulators, large combinatorial values, and algorithms that need overflow-free 64×64 multiplication. == Supported Compilers From 1e9524bca4aac3bfce983526fde2a0388bd1dddb Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 14:39:53 -0500 Subject: [PATCH 009/137] Small update to README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d55e7bae..c57a2c5f 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ int128 is under active development and is not an official boost library. This library is header only. It contains no other dependencies. Simply `#include` it and use it. +With C++20 and greater you can instead `import boost.int128`. ## CMake @@ -64,7 +65,7 @@ struct int128_t; These types operate like built-in integer types. They have their own implementations of the Standard-Library functions -(e.g. like those found in ``, ``, `` etc.). +(e.g., like those found in ``, ``, `` etc.). The entire library can be conveniently included with `#include ` From 4e5adc158e58021111af2df43e631e6c378eb237 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 14:31:31 -0500 Subject: [PATCH 010/137] Add bit test --- module/Jamfile | 1 + test/test_bit.cpp | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/module/Jamfile b/module/Jamfile index 68463c18..085975ea 100644 --- a/module/Jamfile +++ b/module/Jamfile @@ -27,3 +27,4 @@ project obj int128 : int128.cxx : msvc:-interface ; run quick_test.cpp int128 : : : int128 ; +run ../test/test_bit.cpp int128 : : : int128 ; diff --git a/test/test_bit.cpp b/test/test_bit.cpp index 743c3522..54a42b48 100644 --- a/test/test_bit.cpp +++ b/test/test_bit.cpp @@ -2,10 +2,19 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt +#include + +#ifndef BOOST_INT128_BUILD_MODULE + #include #include #include -#include + +#else + +import boost.int128; + +#endif void test_has_single_bit() { From 54ef382c6730842e0fd2b436c9e589f039c0f217 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Feb 2026 14:31:47 -0500 Subject: [PATCH 011/137] Test and fix export of div --- include/boost/int128/cstdlib.hpp | 8 ++++---- module/Jamfile | 1 + test/test_div.cpp | 29 ++++++++++++++++++++--------- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/boost/int128/cstdlib.hpp b/include/boost/int128/cstdlib.hpp index 87c71994..228ecd9c 100644 --- a/include/boost/int128/cstdlib.hpp +++ b/include/boost/int128/cstdlib.hpp @@ -10,19 +10,19 @@ namespace boost { namespace int128 { -struct u128div_t +BOOST_INT128_EXPORT struct u128div_t { uint128_t quot; uint128_t rem; }; -struct i128div_t +BOOST_INT128_EXPORT struct i128div_t { int128_t quot; int128_t rem; }; -constexpr u128div_t div(const uint128_t x, const uint128_t y) noexcept +BOOST_INT128_EXPORT constexpr u128div_t div(const uint128_t x, const uint128_t y) noexcept { if (BOOST_INT128_UNLIKELY(x == 0U || y == 0U)) { @@ -54,7 +54,7 @@ constexpr u128div_t div(const uint128_t x, const uint128_t y) noexcept } } -constexpr i128div_t div(const int128_t x, const int128_t y) noexcept +BOOST_INT128_EXPORT constexpr i128div_t div(const int128_t x, const int128_t y) noexcept { if (BOOST_INT128_UNLIKELY(x == 0 || y == 0)) { diff --git a/module/Jamfile b/module/Jamfile index 085975ea..bddddc77 100644 --- a/module/Jamfile +++ b/module/Jamfile @@ -28,3 +28,4 @@ obj int128 : int128.cxx : msvc:-interface ; run quick_test.cpp int128 : : : int128 ; run ../test/test_bit.cpp int128 : : : int128 ; +run ../test/test_div.cpp int128 : : : int128 ; diff --git a/test/test_div.cpp b/test/test_div.cpp index dd84a0ca..519d050c 100644 --- a/test/test_div.cpp +++ b/test/test_div.cpp @@ -2,7 +2,18 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt -#include +#ifndef BOOST_INT128_BUILD_MODULE + +#include +#include +#include + +#else + +import boost.int128; + +#endif + #include #include @@ -48,22 +59,22 @@ void test_unsigned_div() BOOST_INT128_UNREACHABLE; // LCOV_EXCL_LINE } - const auto div_res {div(lhs, rhs)}; + const auto div_res {boost::int128::div(lhs, rhs)}; BOOST_TEST_EQ(div_res.quot, lhs / rhs); BOOST_TEST_EQ(div_res.rem, lhs % rhs); - const auto inv_div_res {div(rhs, lhs)}; + const auto inv_div_res {boost::int128::div(rhs, lhs)}; BOOST_TEST_EQ(inv_div_res.quot, rhs / lhs); BOOST_TEST_EQ(inv_div_res.rem, rhs % lhs); } uint128_t lhs {dist(rng), dist(rng)}; uint128_t zero {dist(rng) * 0U, dist(rng) * 0U}; - const auto lhs_num {div(lhs, zero)}; + const auto lhs_num {boost::int128::div(lhs, zero)}; BOOST_TEST_EQ(lhs_num.quot, 0U); BOOST_TEST_EQ(lhs_num.rem, 0U); - const auto lhs_denom {div(zero, lhs)}; + const auto lhs_denom {boost::int128::div(zero, lhs)}; BOOST_TEST_EQ(lhs_denom.quot, 0U); BOOST_TEST_EQ(lhs_denom.rem, 0U); } @@ -98,22 +109,22 @@ void test_signed_div() BOOST_INT128_UNREACHABLE; // LCOV_EXCL_LINE } - const auto div_res {div(lhs, rhs)}; + const auto div_res {boost::int128::div(lhs, rhs)}; BOOST_TEST_EQ(div_res.quot, lhs / rhs); BOOST_TEST_EQ(div_res.rem, lhs % rhs); - const auto inv_div_res {div(rhs, lhs)}; + const auto inv_div_res {boost::int128::div(rhs, lhs)}; BOOST_TEST_EQ(inv_div_res.quot, rhs / lhs); BOOST_TEST_EQ(inv_div_res.rem, rhs % lhs); } int128_t lhs {idist(rng), dist(rng)}; int128_t zero {idist(rng) * 0, dist(rng) * 0U}; - const auto lhs_num {div(lhs, zero)}; + const auto lhs_num {boost::int128::div(lhs, zero)}; BOOST_TEST_EQ(lhs_num.quot, 0); BOOST_TEST_EQ(lhs_num.rem, 0); - const auto lhs_denom {div(zero, lhs)}; + const auto lhs_denom {boost::int128::div(zero, lhs)}; BOOST_TEST_EQ(lhs_denom.quot, 0); BOOST_TEST_EQ(lhs_denom.rem, 0); } From 8cbbbc67511b2ebc76496e770f9bc54b68d26e54 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 08:28:07 -0500 Subject: [PATCH 012/137] Add additional module tests --- module/Jamfile | 7 +++++++ test/test_gcd_lcm.cpp | 10 ++++++++++ test/test_i128.cpp | 9 +++++++++ test/test_i128_no_sign_conv.cpp | 10 ++++++++++ test/test_limits_i128.cpp | 9 +++++++++ test/test_limits_u128.cpp | 9 +++++++++ test/test_literals.cpp | 9 +++++++++ test/test_midpoint.cpp | 9 +++++++++ 8 files changed, 72 insertions(+) diff --git a/module/Jamfile b/module/Jamfile index bddddc77..0cdbc520 100644 --- a/module/Jamfile +++ b/module/Jamfile @@ -29,3 +29,10 @@ obj int128 : int128.cxx : msvc:-interface ; run quick_test.cpp int128 : : : int128 ; run ../test/test_bit.cpp int128 : : : int128 ; run ../test/test_div.cpp int128 : : : int128 ; +run ../test/test_gcd_lcm.cpp int128 : : : int128 ; +run ../test/test_i128.cpp int128 : : : int128 ; +run ../test/test_i128_no_sign_conv.cpp int128 : : : int128 ; +run ../test/test_limits_i128.cpp : : : int128 ; +run ../test/test_limits_u128.cpp : : : int128 ; +run ../test/test_literals.cpp : : : int128 ; +run ../test/test_midpoint.cpp : : : int128 ; diff --git a/test/test_gcd_lcm.cpp b/test/test_gcd_lcm.cpp index 4b8e1763..7d645c4c 100644 --- a/test/test_gcd_lcm.cpp +++ b/test/test_gcd_lcm.cpp @@ -2,7 +2,17 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_INT128_BUILD_MODULE + #include + +#else + +import boost.int128; + +#endif + #include #include diff --git a/test/test_i128.cpp b/test/test_i128.cpp index a898fdf8..d52c2e59 100644 --- a/test/test_i128.cpp +++ b/test/test_i128.cpp @@ -6,9 +6,18 @@ # define BOOST_INT128_ALLOW_SIGN_CONVERSION #endif +#ifndef BOOST_INT128_BUILD_MODULE + #include #include #include + +#else + +import boost.int128; + +#endif + #include #include #include diff --git a/test/test_i128_no_sign_conv.cpp b/test/test_i128_no_sign_conv.cpp index c5bfe7f7..ba2a289e 100644 --- a/test/test_i128_no_sign_conv.cpp +++ b/test/test_i128_no_sign_conv.cpp @@ -2,8 +2,18 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt +#ifndef BOOST_INT128_BUILD_MODULE + #include #include +#include + +#else + +import boost.int128; + +#endif + #include #include #include diff --git a/test/test_limits_i128.cpp b/test/test_limits_i128.cpp index c266bdcc..95d3ffaf 100644 --- a/test/test_limits_i128.cpp +++ b/test/test_limits_i128.cpp @@ -2,7 +2,16 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt +#ifndef BOOST_INT128_BUILD_MODULE + #include + +#else + +import boost.int128; + +#endif + #include #include diff --git a/test/test_limits_u128.cpp b/test/test_limits_u128.cpp index bac9c810..1dd0ab7a 100644 --- a/test/test_limits_u128.cpp +++ b/test/test_limits_u128.cpp @@ -2,7 +2,16 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt +#ifndef BOOST_INT128_BUILD_MODULE + #include + +#else + +import boost.int128; + +#endif + #include #include diff --git a/test/test_literals.cpp b/test/test_literals.cpp index a036ab61..37c94081 100644 --- a/test/test_literals.cpp +++ b/test/test_literals.cpp @@ -2,9 +2,18 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt +#ifndef BOOST_INT128_BUILD_MODULE + #include #include #include + +#else + +import boost.int128; + +#endif + #include #include diff --git a/test/test_midpoint.cpp b/test/test_midpoint.cpp index e1d9ffa8..1a84dbe0 100644 --- a/test/test_midpoint.cpp +++ b/test/test_midpoint.cpp @@ -2,7 +2,16 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt +#ifndef BOOST_INT128_BUILD_MODULE + #include + +#else + +import boost.int128; + +#endif + #include #include From 8b70c47693eb3d34510022b20d80e594ce30cd9f Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 09:26:44 -0500 Subject: [PATCH 013/137] Make conversions between lib types explicit --- include/boost/int128/detail/int128_imp.hpp | 4 ++-- include/boost/int128/detail/uint128_imp.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 1a4dcc5a..25c740cc 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -55,8 +55,8 @@ int128_t constexpr int128_t& operator=(const int128_t&) noexcept = default; constexpr int128_t& operator=(int128_t&&) noexcept = default; - // Requires conversion file to be implemented - constexpr int128_t(const uint128_t& v) noexcept; + // Requires a conversion file to be implemented + explicit constexpr int128_t(const uint128_t& v) noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index 987d29c2..2126c16b 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -56,8 +56,8 @@ uint128_t constexpr uint128_t& operator=(const uint128_t&) noexcept = default; constexpr uint128_t& operator=(uint128_t&&) noexcept = default; - // Requires conversion file to be implemented - constexpr uint128_t(const int128_t& v) noexcept; + // Requires a conversion file to be implemented + explicit constexpr uint128_t(const int128_t& v) noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE From 4cd88ade54b16b9079ade7bc722d259b99b5065d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 09:26:58 -0500 Subject: [PATCH 014/137] Fix instances of potential sign conversion --- include/boost/int128/detail/mini_from_chars.hpp | 4 ++-- include/boost/int128/fmt_format.hpp | 2 +- include/boost/int128/format.hpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/boost/int128/detail/mini_from_chars.hpp b/include/boost/int128/detail/mini_from_chars.hpp index c7aca800..b6661b48 100644 --- a/include/boost/int128/detail/mini_from_chars.hpp +++ b/include/boost/int128/detail/mini_from_chars.hpp @@ -76,8 +76,8 @@ constexpr int from_chars_integer_impl(const char* first, const char* last, Integ ++next; } - overflow_value = (std::numeric_limits::max)(); - max_digit = (std::numeric_limits::max)(); + overflow_value = static_cast((std::numeric_limits::max)()); + max_digit = static_cast((std::numeric_limits::max)()); if (is_negative) { diff --git a/include/boost/int128/fmt_format.hpp b/include/boost/int128/fmt_format.hpp index 6df48626..e328e638 100644 --- a/include/boost/int128/fmt_format.hpp +++ b/include/boost/int128/fmt_format.hpp @@ -280,7 +280,7 @@ struct formatter } else { - abs_v = v; + abs_v = static_cast(v); } const auto end = detail::mini_to_chars(buffer, abs_v, base, is_upper); diff --git a/include/boost/int128/format.hpp b/include/boost/int128/format.hpp index d6e235ad..16289415 100644 --- a/include/boost/int128/format.hpp +++ b/include/boost/int128/format.hpp @@ -269,7 +269,7 @@ struct formatter } else { - abs_v = v; + abs_v = static_cast(v); } const auto end = boost::int128::detail::mini_to_chars(buffer, abs_v, base, is_upper); From fdcd1f1cc754995de915781134e00c3c3e9287cf Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 09:46:45 -0500 Subject: [PATCH 015/137] Implement mixed type comparisons when sign compare is defined --- include/boost/int128/detail/conversions.hpp | 195 +++++++++++++++++++- 1 file changed, 189 insertions(+), 6 deletions(-) diff --git a/include/boost/int128/detail/conversions.hpp b/include/boost/int128/detail/conversions.hpp index 02319668..4fe33440 100644 --- a/include/boost/int128/detail/conversions.hpp +++ b/include/boost/int128/detail/conversions.hpp @@ -42,46 +42,225 @@ constexpr uint128_t::uint128_t(const int128_t& v) noexcept : high {static_cast && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator==(T, U) noexcept +constexpr bool operator==(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE + static_assert(std::is_same::value, "Sign Compare Error, cast one type to the other for this operation"); + static_cast(lhs); + static_cast(rhs); return true; + + #else + + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + { + if (lhs < T{0}) + { + return false; + } + + return static_cast(lhs) == rhs; + } + else + { + static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); + + if (rhs < T{0}) + { + return false; + } + + return lhs == static_cast(rhs); + } + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator!=(T, U) noexcept +constexpr bool operator!=(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE + static_assert(std::is_same::value, "Sign Compare Error, cast one type to the other for this operation"); + static_cast(lhs); + static_cast(rhs); return true; + + #else + + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + { + if (lhs < T{0}) + { + return false; + } + + return static_cast(lhs) != rhs; + } + else + { + static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); + + if (rhs < T{0}) + { + return false; + } + + return lhs != static_cast(rhs); + } + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator<(T, U) noexcept +constexpr bool operator<(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE + static_assert(std::is_same::value, "Sign Compare Error, cast one type to the other for this operation"); + static_cast(lhs); + static_cast(rhs); return true; + + #else + + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + { + if (lhs < T{0}) + { + return true; + } + + return static_cast(lhs) < rhs; + } + else + { + static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); + + if (rhs < T{0}) + { + return false; + } + + return lhs < static_cast(rhs); + } + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator<=(T, U) noexcept +constexpr bool operator<=(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE + static_assert(std::is_same::value, "Sign Compare Error, cast one type to the other for this operation"); + static_cast(lhs); + static_cast(rhs); return true; + + #else + + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + { + if (lhs < T{0}) + { + return true; + } + + return static_cast(lhs) <= rhs; + } + else + { + static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); + + if (rhs < T{0}) + { + return false; + } + + return lhs <= static_cast(rhs); + } + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator>(T, U) noexcept +constexpr bool operator>(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE + static_assert(std::is_same::value, "Sign Compare Error, cast one type to the other for this operation"); + static_cast(lhs); + static_cast(rhs); return true; + + #else + + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + { + if (lhs < T{0}) + { + return false; + } + + return static_cast(lhs) > rhs; + } + else + { + static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); + + if (rhs < T{0}) + { + return true; + } + + return lhs > static_cast(rhs); + } + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator>=(T, U) noexcept +constexpr bool operator>=(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE + static_assert(std::is_same::value, "Sign Compare Error, cast one type to the other for this operation"); + static_cast(lhs); + static_cast(rhs); return true; + + #else + + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + { + if (lhs < T{0}) + { + return false; + } + + return static_cast(lhs) >= rhs; + } + else + { + static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); + + if (rhs < T{0}) + { + return true; + } + + return lhs >= static_cast(rhs); + } + + #endif } //===================================== @@ -123,6 +302,10 @@ constexpr T operator%(T lhs, U) noexcept return lhs; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif + } // namespace int128 } // namespace boost From 3b945da6b6eb964ae40a56236a9e4896607d5cc6 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 10:11:33 -0500 Subject: [PATCH 016/137] Add testing of mixed type compare when allowed --- test/Jamfile | 1 + test/test_mixed_type_compare.cpp | 82 ++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 test/test_mixed_type_compare.cpp diff --git a/test/Jamfile b/test/Jamfile index 3f7de536..1166a9ce 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -69,6 +69,7 @@ run test_stream.cpp ; compile-fail test_mixed_type_ops.cpp ; compile-fail test_mixed_arithmetic.cpp ; +run test_mixed_type_compare.cpp ; run test_consteval_funcs.cpp ; run test_sign_compare.cpp ; diff --git a/test/test_mixed_type_compare.cpp b/test/test_mixed_type_compare.cpp new file mode 100644 index 00000000..80b843b3 --- /dev/null +++ b/test/test_mixed_type_compare.cpp @@ -0,0 +1,82 @@ +// Copyright 2025 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_INT128_ALLOW_SIGN_COMPARE + +#include +#include +#include + +static std::mt19937_64 rng{42}; +static std::uniform_int_distribution u_dist{0, UINT64_MAX}; +static std::uniform_int_distribution i_dist{0, INT64_MAX}; +static constexpr std::size_t N {1024U}; + +using namespace boost::int128; + +void test_left_unsigned() +{ + for (std::size_t i {0}; i < N; ++i) + { + const auto lhs {u_dist(rng)}; + const auto rhs {i_dist(rng)}; + + const uint128_t lib_lhs {lhs}; + const int128_t lib_rhs {rhs}; + + BOOST_TEST_EQ(lib_lhs == lib_rhs, lhs == rhs); + BOOST_TEST_EQ(lib_lhs != lib_rhs, lhs != rhs); + BOOST_TEST_EQ(lib_lhs > lib_rhs, lhs > rhs); + BOOST_TEST_EQ(lib_lhs >= lib_rhs, lhs >= rhs); + BOOST_TEST_EQ(lib_lhs < lib_rhs, lhs < rhs); + BOOST_TEST_EQ(lib_lhs <= lib_rhs, lhs <= rhs); + } + + const uint128_t lhs {42u}; + const int128_t rhs {-42}; + + BOOST_TEST_EQ(lhs == rhs, false); + BOOST_TEST_EQ(lhs != rhs, true); + BOOST_TEST_EQ(lhs < rhs, false); + BOOST_TEST_EQ(lhs <= rhs, false); + BOOST_TEST_EQ(lhs > rhs, true); + BOOST_TEST_EQ(lhs >= rhs, true); +} + +void test_right_unsigned() +{ + for (std::size_t i {0}; i < N; ++i) + { + const auto lhs {i_dist(rng)}; + const auto rhs {u_dist(rng)}; + + const int128_t lib_lhs {lhs}; + const uint128_t lib_rhs {rhs}; + + BOOST_TEST_EQ(lib_lhs == lib_rhs, lhs == rhs); + BOOST_TEST_EQ(lib_lhs != lib_rhs, lhs != rhs); + BOOST_TEST_EQ(lib_lhs > lib_rhs, lhs > rhs); + BOOST_TEST_EQ(lib_lhs >= lib_rhs, lhs >= rhs); + BOOST_TEST_EQ(lib_lhs < lib_rhs, lhs < rhs); + BOOST_TEST_EQ(lib_lhs <= lib_rhs, lhs <= rhs); + } + + const int128_t lhs {-42}; + const uint128_t rhs {42u}; + + BOOST_TEST_EQ(lhs == rhs, false); + BOOST_TEST_EQ(lhs != rhs, true); + BOOST_TEST_EQ(lhs < rhs, true); + BOOST_TEST_EQ(lhs <= rhs, true); + BOOST_TEST_EQ(lhs > rhs, false); + BOOST_TEST_EQ(lhs >= rhs, false); +} + +int main() +{ + test_left_unsigned(); + test_right_unsigned(); + + return boost::report_errors(); +} From 01bf86e8b5784db96c03864f9775bab012c4f84d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 10:11:43 -0500 Subject: [PATCH 017/137] Bug fix and logic improvement --- include/boost/int128/detail/conversions.hpp | 28 ++++++--------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/include/boost/int128/detail/conversions.hpp b/include/boost/int128/detail/conversions.hpp index 4fe33440..4d0523d3 100644 --- a/include/boost/int128/detail/conversions.hpp +++ b/include/boost/int128/detail/conversions.hpp @@ -59,7 +59,7 @@ constexpr bool operator==(const T lhs, const U rhs) noexcept #else - BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { if (lhs < T{0}) { @@ -70,8 +70,6 @@ constexpr bool operator==(const T lhs, const U rhs) noexcept } else { - static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); - if (rhs < T{0}) { return false; @@ -95,22 +93,20 @@ constexpr bool operator!=(const T lhs, const U rhs) noexcept #else - BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { if (lhs < T{0}) { - return false; + return true; } return static_cast(lhs) != rhs; } else { - static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); - if (rhs < T{0}) { - return false; + return true; } return lhs != static_cast(rhs); @@ -131,7 +127,7 @@ constexpr bool operator<(const T lhs, const U rhs) noexcept #else - BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { if (lhs < T{0}) { @@ -142,8 +138,6 @@ constexpr bool operator<(const T lhs, const U rhs) noexcept } else { - static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); - if (rhs < T{0}) { return false; @@ -167,7 +161,7 @@ constexpr bool operator<=(const T lhs, const U rhs) noexcept #else - BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { if (lhs < T{0}) { @@ -178,8 +172,6 @@ constexpr bool operator<=(const T lhs, const U rhs) noexcept } else { - static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); - if (rhs < T{0}) { return false; @@ -203,7 +195,7 @@ constexpr bool operator>(const T lhs, const U rhs) noexcept #else - BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { if (lhs < T{0}) { @@ -214,8 +206,6 @@ constexpr bool operator>(const T lhs, const U rhs) noexcept } else { - static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); - if (rhs < T{0}) { return true; @@ -239,7 +229,7 @@ constexpr bool operator>=(const T lhs, const U rhs) noexcept #else - BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { if (lhs < T{0}) { @@ -250,8 +240,6 @@ constexpr bool operator>=(const T lhs, const U rhs) noexcept } else { - static_assert(std::numeric_limits::is_signed, "Wrong sign detected. Please open a bug report at https://github.com/cppalliance/int128"); - if (rhs < T{0}) { return true; From 861e2c5841c3d51c5a6cd2c04a1d528ba6832c32 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 10:24:32 -0500 Subject: [PATCH 018/137] Rename test file --- test/Jamfile | 2 +- ..._mixed_type_compare.cpp => test_mixed_type_sign_compare.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename test/{test_mixed_type_compare.cpp => test_mixed_type_sign_compare.cpp} (100%) diff --git a/test/Jamfile b/test/Jamfile index 1166a9ce..8d3b774d 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -69,7 +69,7 @@ run test_stream.cpp ; compile-fail test_mixed_type_ops.cpp ; compile-fail test_mixed_arithmetic.cpp ; -run test_mixed_type_compare.cpp ; +run test_mixed_type_sign_compare.cpp ; run test_consteval_funcs.cpp ; run test_sign_compare.cpp ; diff --git a/test/test_mixed_type_compare.cpp b/test/test_mixed_type_sign_compare.cpp similarity index 100% rename from test/test_mixed_type_compare.cpp rename to test/test_mixed_type_sign_compare.cpp From 50d664d6cce504b44ca20f4e968d1b84a50efbe4 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 16:10:13 -0500 Subject: [PATCH 019/137] Add mixed type operations --- include/boost/int128/detail/conversions.hpp | 65 +++++++++++++++++---- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/include/boost/int128/detail/conversions.hpp b/include/boost/int128/detail/conversions.hpp index 4d0523d3..397a5e3b 100644 --- a/include/boost/int128/detail/conversions.hpp +++ b/include/boost/int128/detail/conversions.hpp @@ -256,38 +256,83 @@ constexpr bool operator>=(const T lhs, const U rhs) noexcept //===================================== template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr T operator+(T lhs, U) noexcept +constexpr uint128_t operator+(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION + static_assert(std::is_same::value, "Sign Conversion Error, cast one type to the other for this operation"); - return lhs; + static_cast(rhs); + return static_cast(lhs); + + #else + + return static_cast(lhs) + static_cast(rhs); + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr T operator-(T lhs, U) noexcept +constexpr uint128_t operator-(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION + static_assert(std::is_same::value, "Sign Conversion Error, cast one type to the other for this operation"); - return lhs; + static_cast(rhs); + return static_cast(lhs); + + #else + + return static_cast(lhs) - static_cast(rhs); + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr T operator*(T lhs, U) noexcept +constexpr uint128_t operator*(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION + static_assert(std::is_same::value, "Sign Conversion Error, cast one type to the other for this operation"); - return lhs; + static_cast(rhs); + return static_cast(lhs); + + #else + + return static_cast(lhs) * static_cast(rhs); + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr T operator/(T lhs, U) noexcept +constexpr uint128_t operator/(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION + static_assert(std::is_same::value, "Sign Conversion Error, cast one type to the other for this operation"); - return lhs; + static_cast(rhs); + return static_cast(lhs); + + #else + + return static_cast(lhs) / static_cast(rhs); + + #endif } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr T operator%(T lhs, U) noexcept +constexpr uint128_t operator%(const T lhs, const U rhs) noexcept { + #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION + static_assert(std::is_same::value, "Sign Conversion Error, cast one type to the other for this operation"); - return lhs; + static_cast(rhs); + return static_cast(lhs); + + #else + + return static_cast(lhs) % static_cast(rhs); + + #endif } #ifdef _MSC_VER From 3c03ec1e5d2f9bb42d51e86d084922a1e10aa95e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 16:10:25 -0500 Subject: [PATCH 020/137] Add sign conversion testing --- test/Jamfile | 1 + test/test_mixed_type_sign_conversion.cpp | 56 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 test/test_mixed_type_sign_conversion.cpp diff --git a/test/Jamfile b/test/Jamfile index 8d3b774d..fd3f9325 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -70,6 +70,7 @@ run test_stream.cpp ; compile-fail test_mixed_type_ops.cpp ; compile-fail test_mixed_arithmetic.cpp ; run test_mixed_type_sign_compare.cpp ; +run test_mixed_type_sign_conversion.cpp ; run test_consteval_funcs.cpp ; run test_sign_compare.cpp ; diff --git a/test/test_mixed_type_sign_conversion.cpp b/test/test_mixed_type_sign_conversion.cpp new file mode 100644 index 00000000..a5605d84 --- /dev/null +++ b/test/test_mixed_type_sign_conversion.cpp @@ -0,0 +1,56 @@ +// Copyright 2025 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include + +static std::mt19937_64 rng{42}; +static std::uniform_int_distribution u_dist{0, static_cast(std::sqrt(UINT64_MAX))}; +static std::uniform_int_distribution i_dist{0, static_cast(std::sqrt(INT64_MAX))}; +static constexpr std::size_t N {1024U}; + +using namespace boost::int128; + +void test() +{ + for (std::size_t i {0}; i < N; ++i) + { + const auto u_val {u_dist(rng)}; + const auto i_val {i_dist(rng)}; + + if (u_val > static_cast(i_val)) + { + const uint128_t lhs {u_val}; + const int128_t rhs {i_val}; + + BOOST_TEST_EQ(lhs + rhs, u_val + static_cast(i_val)); + BOOST_TEST_EQ(lhs - rhs, u_val - static_cast(i_val)); + BOOST_TEST_EQ(lhs * rhs, u_val * static_cast(i_val)); + BOOST_TEST_EQ(lhs / rhs, u_val / static_cast(i_val)); + BOOST_TEST_EQ(lhs % rhs, u_val % static_cast(i_val)); + } + else + { + const int128_t lhs {i_val}; + const uint128_t rhs {u_val}; + + BOOST_TEST_EQ(lhs + rhs, static_cast(i_val) + u_val); + BOOST_TEST_EQ(lhs - rhs, static_cast(i_val) - u_val); + BOOST_TEST_EQ(lhs * rhs, static_cast(i_val) * u_val); + BOOST_TEST_EQ(lhs / rhs, static_cast(i_val) / u_val); + BOOST_TEST_EQ(lhs % rhs, static_cast(i_val) % u_val); + } + } +} + +int main() +{ + test(); + + return boost::report_errors(); +} \ No newline at end of file From b95a72277e8f101c5de7d5ef42bcd14048eb15f7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 16:22:52 -0500 Subject: [PATCH 021/137] Fix conversion error --- include/boost/int128/detail/uint128_imp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index 2126c16b..eed003fb 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -1855,7 +1855,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator<<(const uint128_t lhs, const ui { if (rhs.high > UINT64_C(0) || rhs.low >= UINT64_C(128)) { - return 0; + return uint128_t{0}; } return lhs << rhs.low; @@ -2057,7 +2057,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator>>(const uint128_t lhs, const ui { if (rhs.high > UINT64_C(0) || rhs.low >= UINT64_C(128)) { - return 0; + return uint128_t{0}; } return lhs >> rhs.low; From b0b6f79cc3511adf83feb22109eb6d4f2d8d0543 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 18:42:33 -0500 Subject: [PATCH 022/137] Ignore GCC warnings --- test/test_mixed_type_sign_compare.cpp | 5 +++++ test/test_mixed_type_sign_conversion.cpp | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/test/test_mixed_type_sign_compare.cpp b/test/test_mixed_type_sign_compare.cpp index 80b843b3..47b452ff 100644 --- a/test/test_mixed_type_sign_compare.cpp +++ b/test/test_mixed_type_sign_compare.cpp @@ -8,6 +8,11 @@ #include #include +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-compare" +#endif + static std::mt19937_64 rng{42}; static std::uniform_int_distribution u_dist{0, UINT64_MAX}; static std::uniform_int_distribution i_dist{0, INT64_MAX}; diff --git a/test/test_mixed_type_sign_conversion.cpp b/test/test_mixed_type_sign_conversion.cpp index a5605d84..79f6106c 100644 --- a/test/test_mixed_type_sign_conversion.cpp +++ b/test/test_mixed_type_sign_conversion.cpp @@ -9,6 +9,12 @@ #include #include +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-compare" +# pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + static std::mt19937_64 rng{42}; static std::uniform_int_distribution u_dist{0, static_cast(std::sqrt(UINT64_MAX))}; static std::uniform_int_distribution i_dist{0, static_cast(std::sqrt(INT64_MAX))}; From 113be1bc8a2af3300d0862f5ed7a75b502f0daf0 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 2 Mar 2026 18:42:53 -0500 Subject: [PATCH 023/137] Various windows fail fixes --- include/boost/int128/detail/int128_imp.hpp | 2 +- include/boost/int128/numeric.hpp | 2 +- test/test_bit.cpp | 2 +- test/test_u128.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 25c740cc..9b1ddfe5 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -1541,7 +1541,7 @@ constexpr int128_t operator^(const UnsignedInteger lhs, const int128_t rhs) noex static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); static_cast(lhs); static_cast(rhs); - return true; + return int128_t{}; #endif } diff --git a/include/boost/int128/numeric.hpp b/include/boost/int128/numeric.hpp index fa22f7c5..6aab714a 100644 --- a/include/boost/int128/numeric.hpp +++ b/include/boost/int128/numeric.hpp @@ -350,7 +350,7 @@ constexpr uint128_t lcm(uint128_t a, uint128_t b) noexcept { if (a == 0U || b == 0U) { - return 0; + return uint128_t{0}; } diff --git a/test/test_bit.cpp b/test/test_bit.cpp index 54a42b48..8085c628 100644 --- a/test/test_bit.cpp +++ b/test/test_bit.cpp @@ -51,7 +51,7 @@ void test_countl_zero() void test_bit_width() { - BOOST_TEST_EQ(boost::int128::bit_width(0), 0); + BOOST_TEST_EQ(boost::int128::bit_width(boost::int128::uint128_t{0}), 0); boost::int128::uint128_t x {1}; diff --git a/test/test_u128.cpp b/test/test_u128.cpp index b1a7c05f..81a9b5c7 100644 --- a/test/test_u128.cpp +++ b/test/test_u128.cpp @@ -1361,7 +1361,7 @@ int main() test_spot_div(-3237361348456748317LL, 8011834041509972187LL); - test_spot_div(boost::int128::uint128_t{50012077812411ULL, 6429278683030093824ULL}, boost::int128::uint128_t{542101086ULL, 4477988020393345024ULL}, 92256); + test_spot_div(boost::int128::uint128_t{50012077812411ULL, 6429278683030093824ULL}, boost::int128::uint128_t{542101086ULL, 4477988020393345024ULL}, boost::int128::uint128_t{92256}); return boost::report_errors(); } From dd01ce0a5cff10e15f32888369640b497e0b0152 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 10:44:46 -0500 Subject: [PATCH 024/137] More MSVC warning fixes --- test/test_mixed_type_sign_compare.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/test_mixed_type_sign_compare.cpp b/test/test_mixed_type_sign_compare.cpp index 47b452ff..ebd99a88 100644 --- a/test/test_mixed_type_sign_compare.cpp +++ b/test/test_mixed_type_sign_compare.cpp @@ -30,12 +30,12 @@ void test_left_unsigned() const uint128_t lib_lhs {lhs}; const int128_t lib_rhs {rhs}; - BOOST_TEST_EQ(lib_lhs == lib_rhs, lhs == rhs); - BOOST_TEST_EQ(lib_lhs != lib_rhs, lhs != rhs); - BOOST_TEST_EQ(lib_lhs > lib_rhs, lhs > rhs); - BOOST_TEST_EQ(lib_lhs >= lib_rhs, lhs >= rhs); - BOOST_TEST_EQ(lib_lhs < lib_rhs, lhs < rhs); - BOOST_TEST_EQ(lib_lhs <= lib_rhs, lhs <= rhs); + BOOST_TEST_EQ(lib_lhs == lib_rhs, lhs == static_cast(rhs)); + BOOST_TEST_EQ(lib_lhs != lib_rhs, lhs != static_cast(rhs)); + BOOST_TEST_EQ(lib_lhs > lib_rhs, lhs > static_cast(rhs)); + BOOST_TEST_EQ(lib_lhs >= lib_rhs, lhs >= static_cast(rhs)); + BOOST_TEST_EQ(lib_lhs < lib_rhs, lhs < static_cast(rhs)); + BOOST_TEST_EQ(lib_lhs <= lib_rhs, lhs <= static_cast(rhs)); } const uint128_t lhs {42u}; @@ -59,12 +59,12 @@ void test_right_unsigned() const int128_t lib_lhs {lhs}; const uint128_t lib_rhs {rhs}; - BOOST_TEST_EQ(lib_lhs == lib_rhs, lhs == rhs); - BOOST_TEST_EQ(lib_lhs != lib_rhs, lhs != rhs); - BOOST_TEST_EQ(lib_lhs > lib_rhs, lhs > rhs); - BOOST_TEST_EQ(lib_lhs >= lib_rhs, lhs >= rhs); - BOOST_TEST_EQ(lib_lhs < lib_rhs, lhs < rhs); - BOOST_TEST_EQ(lib_lhs <= lib_rhs, lhs <= rhs); + BOOST_TEST_EQ(lib_lhs == lib_rhs, static_cast(lhs) == rhs); + BOOST_TEST_EQ(lib_lhs != lib_rhs, static_cast(lhs) != rhs); + BOOST_TEST_EQ(lib_lhs > lib_rhs, static_cast(lhs) > rhs); + BOOST_TEST_EQ(lib_lhs >= lib_rhs, static_cast(lhs) >= rhs); + BOOST_TEST_EQ(lib_lhs < lib_rhs, static_cast(lhs) < rhs); + BOOST_TEST_EQ(lib_lhs <= lib_rhs, static_cast(lhs) <= rhs); } const int128_t lhs {-42}; From be9ad8224c507a6f9090e482be8c0ff668894463 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 11:22:16 -0500 Subject: [PATCH 025/137] Add dependabot workflow --- .github/dependabot.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..23dab29e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,15 @@ +# Copyright 2025 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) + +version: 2 +updates: + - package-ecosystem: "npm" + directory: "/doc" + schedule: + interval: "weekly" + groups: + all-dependencies: + # Groups all updates into a single PR + patterns: + - "*" From 3cbe143d1a4a7362d32a7d5b0fb61d60a251eec5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:23:09 +0000 Subject: [PATCH 026/137] Bump @antora/lunr-extension in /doc in the all-dependencies group --- updated-dependencies: - dependency-name: "@antora/lunr-extension" dependency-version: 1.0.0-alpha.13 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: all-dependencies ... Signed-off-by: dependabot[bot] --- doc/package-lock.json | 8 ++++---- doc/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/package-lock.json b/doc/package-lock.json index 86eaeb65..ea7189b6 100644 --- a/doc/package-lock.json +++ b/doc/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "@antora/lunr-extension": "^1.0.0-alpha.12", + "@antora/lunr-extension": "^1.0.0-alpha.13", "@cppalliance/antora-downloads-extension": "^0.0.2" }, "devDependencies": { @@ -162,9 +162,9 @@ } }, "node_modules/@antora/lunr-extension": { - "version": "1.0.0-alpha.12", - "resolved": "https://registry.npmjs.org/@antora/lunr-extension/-/lunr-extension-1.0.0-alpha.12.tgz", - "integrity": "sha512-iiEXpJae8tCH22ao7kZ4I+eyQ/3IeFIFK1G5I9QLpkCezaVPotI8eLFY7e0xDI+zsqJEfCOsfoZGYXso6xCYlA==", + "version": "1.0.0-alpha.13", + "resolved": "https://registry.npmjs.org/@antora/lunr-extension/-/lunr-extension-1.0.0-alpha.13.tgz", + "integrity": "sha512-u8n8XLB6elMmXbW0bdeL5jG8UBJi6PSiz1zaMn+wIIIu/bnxotRBW4kEWSge+zTfdF4rEYMcJ9LvkAOamMyuKQ==", "license": "MPL-2.0", "workspaces": [ "." diff --git a/doc/package.json b/doc/package.json index 2d89d19c..2d83cedc 100644 --- a/doc/package.json +++ b/doc/package.json @@ -6,6 +6,6 @@ }, "dependencies": { "@cppalliance/antora-downloads-extension": "^0.0.2", - "@antora/lunr-extension": "^1.0.0-alpha.12" + "@antora/lunr-extension": "^1.0.0-alpha.13" } } From c25ad0edfbeef7372e251071aea525f56ee4b0d9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:56:49 +0000 Subject: [PATCH 027/137] Bump minimatch from 3.1.2 to 3.1.5 in /doc Bumps [minimatch](https://github.com/isaacs/minimatch) from 3.1.2 to 3.1.5. - [Changelog](https://github.com/isaacs/minimatch/blob/main/changelog.md) - [Commits](https://github.com/isaacs/minimatch/compare/v3.1.2...v3.1.5) --- updated-dependencies: - dependency-name: minimatch dependency-version: 3.1.5 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- doc/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/package-lock.json b/doc/package-lock.json index ea7189b6..73c1f891 100644 --- a/doc/package-lock.json +++ b/doc/package-lock.json @@ -1492,9 +1492,9 @@ } }, "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", "dev": true, "license": "ISC", "dependencies": { From bb5900f83b6cb8968f8e54205c5e791685dc4982 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 13:37:13 -0500 Subject: [PATCH 028/137] Add header listing to api reference --- doc/modules/ROOT/nav.adoc | 1 + doc/modules/ROOT/pages/api_reference.adoc | 50 ++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc index 7b6280e2..c91432c1 100644 --- a/doc/modules/ROOT/nav.adoc +++ b/doc/modules/ROOT/nav.adoc @@ -29,6 +29,7 @@ ** xref:api_reference.adoc#api_macros[Macros] *** xref:api_reference.adoc#api_macro_literals[Literals] *** xref:api_reference.adoc#api_macro_configuration[Configuration] +** xref:api_reference.adoc#api_headers[Headers] * xref:uint128_t.adoc[] ** xref:uint128_t.adoc#u128_alignment[Alignment] ** xref:uint128_t.adoc#u128_operator_behavior[Operator Behavior] diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc index a52e1a3a..14f83218 100644 --- a/doc/modules/ROOT/pages/api_reference.adoc +++ b/doc/modules/ROOT/pages/api_reference.adoc @@ -323,4 +323,52 @@ Listed by analogous STL header. | xref:config.adoc#automatic_config[`BOOST_INT128_ENDIAN_BIG_BYTE`] | Defined on big-endian systems -|=== \ No newline at end of file +|=== + +[#api_headers] +== Headers + +[cols="1,2", options="header"] +|=== +| Header | Contents + +| `` +| Convenience header including the entire library + +| `` +| xref:api_bit[Bit manipulation functions] + +| `` +| xref:api_charconv[Character Conversion Functions] + +| `` +| Min and Max Macros + +| `` +| xref:api_cstdlib[Combined division and modulo function] + +| `` +| xref:api_formatting[Formating integration for pass:[{fmt}]] + +| `` +| xref:api_formatting[Formating integration for pass:[C++20] ``] + +| `` +| xref:api_types[The int128_t and uint128_t types] + +| `` +| xref:api_iostream[Iostream overloads for int128_t and uint128_t] + +| `` +| Overloads for `std::numeric_limits` for `int128_t` and `uint128_t` + +| `` +| xref:api_literals[User-defined literals for int128_t and uint128_t] + +| `` +| xref:api_numeric[Numeric algorithms (gcd, lcm, midpoint)] + +| `` +| xref:api_string[to_string overloads] + +|=== From 835abee211f342fc203c920fb0cae7d89d3ef922 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 13:38:51 -0500 Subject: [PATCH 029/137] Update outdated not on alignment --- doc/modules/ROOT/pages/int128_t.adoc | 7 +------ doc/modules/ROOT/pages/uint128_t.adoc | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/doc/modules/ROOT/pages/int128_t.adoc b/doc/modules/ROOT/pages/int128_t.adoc index a03e6eb7..e99ba668 100644 --- a/doc/modules/ROOT/pages/int128_t.adoc +++ b/doc/modules/ROOT/pages/int128_t.adoc @@ -57,12 +57,7 @@ If your platform has a native 128-bit signed integer, the struct is defined as s struct alignas(alignof(__int128)) int128_t ---- -Otherwise, it is - -[source, c++] ----- -struct alignas(sizeof(std::uint64_t) * 2) int128_t ----- +Otherwise, it is left up to the compiler to decide. [#i128_operator_behavior] == Operator Behavior diff --git a/doc/modules/ROOT/pages/uint128_t.adoc b/doc/modules/ROOT/pages/uint128_t.adoc index 56473ed7..7a616eb9 100644 --- a/doc/modules/ROOT/pages/uint128_t.adoc +++ b/doc/modules/ROOT/pages/uint128_t.adoc @@ -57,12 +57,7 @@ If your platform has a native 128-bit unsigned integer, the struct is defined as struct alignas(alignof(unsigned __int128)) uint128_t ---- -Otherwise, it is - -[source, c++] ----- -struct alignas(sizeof(std::uint64_t) * 2) uint128_t ----- +Otherwise, it is left up to the compiler to decide. [#u128_operator_behavior] == Operator Behavior From 1cfe6c7ffabff8515e082be3937519b47a04d601 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 16:14:50 -0500 Subject: [PATCH 030/137] Fix xref consistency --- doc/modules/ROOT/pages/api_reference.adoc | 42 +++++++++++------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc index 14f83218..5d7ae0f5 100644 --- a/doc/modules/ROOT/pages/api_reference.adoc +++ b/doc/modules/ROOT/pages/api_reference.adoc @@ -335,40 +335,40 @@ Listed by analogous STL header. | `` | Convenience header including the entire library -| `` -| xref:api_bit[Bit manipulation functions] +| xref:bit.adoc[``] +| Bit manipulation functions -| `` -| xref:api_charconv[Character Conversion Functions] +| xref:charconv.adoc[``] +| Character conversion functions | `` -| Min and Max Macros +| Min and max macros -| `` -| xref:api_cstdlib[Combined division and modulo function] +| xref:cstdlib.adoc[``] +| Combined division and modulo function -| `` -| xref:api_formatting[Formating integration for pass:[{fmt}]] +| xref:format.adoc#fmt_format[``] +| Formatting integration for pass:[{fmt}] -| `` -| xref:api_formatting[Formating integration for pass:[C++20] ``] +| xref:format.adoc#std_format[``] +| Formatting integration for pass:[C++20] `` -| `` -| xref:api_types[The int128_t and uint128_t types] +| xref:uint128_t.adoc[``] +| The `int128_t` and `uint128_t` types -| `` -| xref:api_iostream[Iostream overloads for int128_t and uint128_t] +| xref:stream.adoc[``] +| Iostream overloads for `int128_t` and `uint128_t` | `` | Overloads for `std::numeric_limits` for `int128_t` and `uint128_t` -| `` -| xref:api_literals[User-defined literals for int128_t and uint128_t] +| xref:literals.adoc[``] +| User-defined literals for `int128_t` and `uint128_t` -| `` -| xref:api_numeric[Numeric algorithms (gcd, lcm, midpoint)] +| xref:numeric.adoc[``] +| Numeric algorithms (gcd, lcm, midpoint) -| `` -| xref:api_string[to_string overloads] +| xref:string.adoc[``] +| `to_string` overloads |=== From 3de870df188d9402488bd234bb934c99aa3c27d1 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 16:32:42 -0500 Subject: [PATCH 031/137] Add natvis file as best as we can --- extra/int128.natvis | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 extra/int128.natvis diff --git a/extra/int128.natvis b/extra/int128.natvis new file mode 100644 index 00000000..27fa4fa8 --- /dev/null +++ b/extra/int128.natvis @@ -0,0 +1,39 @@ + + + + + + + {low,u} + 0x{high>>60&0xF,X}{high>>56&0xF,X}{high>>52&0xF,X}{high>>48&0xF,X}{high>>44&0xF,X}{high>>40&0xF,X}{high>>36&0xF,X}{high>>32&0xF,X}{high>>28&0xF,X}{high>>24&0xF,X}{high>>20&0xF,X}{high>>16&0xF,X}{high>>12&0xF,X}{high>>8&0xF,X}{high>>4&0xF,X}{high&0xF,X}'{low>>60&0xF,X}{low>>56&0xF,X}{low>>52&0xF,X}{low>>48&0xF,X}{low>>44&0xF,X}{low>>40&0xF,X}{low>>36&0xF,X}{low>>32&0xF,X}{low>>28&0xF,X}{low>>24&0xF,X}{low>>20&0xF,X}{low>>16&0xF,X}{low>>12&0xF,X}{low>>8&0xF,X}{low>>4&0xF,X}{low&0xF,X} + + high + low + + + + + + {low,u} + -{~low + 1,u} + 0x{(unsigned __int64)high>>60&0xF,X}{(unsigned __int64)high>>56&0xF,X}{(unsigned __int64)high>>52&0xF,X}{(unsigned __int64)high>>48&0xF,X}{(unsigned __int64)high>>44&0xF,X}{(unsigned __int64)high>>40&0xF,X}{(unsigned __int64)high>>36&0xF,X}{(unsigned __int64)high>>32&0xF,X}{(unsigned __int64)high>>28&0xF,X}{(unsigned __int64)high>>24&0xF,X}{(unsigned __int64)high>>20&0xF,X}{(unsigned __int64)high>>16&0xF,X}{(unsigned __int64)high>>12&0xF,X}{(unsigned __int64)high>>8&0xF,X}{(unsigned __int64)high>>4&0xF,X}{(unsigned __int64)high&0xF,X}'{low>>60&0xF,X}{low>>56&0xF,X}{low>>52&0xF,X}{low>>48&0xF,X}{low>>44&0xF,X}{low>>40&0xF,X}{low>>36&0xF,X}{low>>32&0xF,X}{low>>28&0xF,X}{low>>24&0xF,X}{low>>20&0xF,X}{low>>16&0xF,X}{low>>12&0xF,X}{low>>8&0xF,X}{low>>4&0xF,X}{low&0xF,X} + + high + low + + + + From 08619efc56d6d8b2a9d5c497fd84d2305358118d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 16:32:53 -0500 Subject: [PATCH 032/137] Add natvis to printers documentation --- doc/modules/ROOT/pages/printer.adoc | 77 ++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/doc/modules/ROOT/pages/printer.adoc b/doc/modules/ROOT/pages/printer.adoc index d1181dca..682651cb 100644 --- a/doc/modules/ROOT/pages/printer.adoc +++ b/doc/modules/ROOT/pages/printer.adoc @@ -8,8 +8,8 @@ https://www.boost.org/LICENSE_1_0.txt = Pretty Printers :idprefix: pretty_printers_ -Pretty printers allow debuggers to display `uint128_t` and `int128_t` values in human-readable decimal format instead of showing the raw struct members. -The library contains pretty printers for LLDB and GDB in the `extra/` folder. +Pretty printers allow debuggers to display `uint128_t` and `int128_t` values in human-readable format instead of showing the raw struct members. +The library contains pretty printers for LLDB, GDB, and Visual Studio in the `extra/` folder. == LLDB @@ -39,3 +39,76 @@ or you can source it manually in GDB. ---- (gdb) source /path/to/int128/extra/int128_printer_gdb.py ---- + +== Visual Studio (NATVIS) + +The `extra/int128.natvis` file provides visualization for the Visual Studio debugger. +There are several ways to register it: + +=== Per-Project + +Add the `.natvis` file to your Visual Studio project. +In Solution Explorer, right-click the project, select **Add > Existing Item**, and choose `int128.natvis`. +Visual Studio will automatically use it when debugging that project. + +=== Per-User (All Projects) + +Copy `int128.natvis` to your per-user Visualizers directory: + +[source] +---- +%USERPROFILE%\Documents\Visual Studio 2022\Visualizers\ +---- + +Replace `2022` with your Visual Studio version. +All projects debugged with that installation will use the visualizer. + +=== CMake Projects + +Add the `.natvis` file as a source file in your `CMakeLists.txt`: + +[source,cmake] +---- +target_sources(my_target PRIVATE /path/to/int128/extra/int128.natvis) +---- + +=== Display Format + +Values that fit in 64 bits are displayed in decimal. +Larger values are displayed as a synthesized hexadecimal value with a `'` digit separator between the high and low halves. + +[cols="1,1,2", options="header"] +|=== +| Type | Value | Display + +| `uint128_t` +| `42` +| `42` + +| `uint128_t` +| `2^64 + 1` +| `0x0000000000000001'0000000000000001` + +| `uint128_t` +| `uint128_max` +| `0xFFFFFFFFFFFFFFFF'FFFFFFFFFFFFFFFF` + +| `int128_t` +| `42` +| `42` + +| `int128_t` +| `-5` +| `-5` + +| `int128_t` +| `2^64 + 1` +| `0x0000000000000001'0000000000000001` + +| `int128_t` +| `int128_min` +| `0x8000000000000000'0000000000000000` +|=== + +NOTE: Full decimal display for values beyond 64 bits is not possible in NATVIS. +The NATVIS expression evaluator does not support 128-bit arithmetic, so values that exceed the 64-bit range are shown in hexadecimal. From 05213ce1ee6e1a11d0293e7db473823beac964ff Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 16:36:30 -0500 Subject: [PATCH 033/137] Consistency --- doc/modules/ROOT/pages/literals.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/modules/ROOT/pages/literals.adoc b/doc/modules/ROOT/pages/literals.adoc index c075e67b..2b698704 100644 --- a/doc/modules/ROOT/pages/literals.adoc +++ b/doc/modules/ROOT/pages/literals.adoc @@ -34,14 +34,14 @@ constexpr int128_t operator ""_i128(const char* str) noexcept; constexpr int128_t operator ""_I128(const char* str) noexcept; -constexpr int128_t operator ""_i128(unsigned long long v) noexcept; - -constexpr int128_t operator ""_I128(unsigned long long v) noexcept; - constexpr int128_t operator ""_i128(const char* str, std::size_t len) noexcept; constexpr int128_t operator ""_I128(const char* str, std::size_t len) noexcept; +constexpr int128_t operator ""_i128(unsigned long long v) noexcept; + +constexpr int128_t operator ""_I128(unsigned long long v) noexcept; + } // namespace literals } // namespace int128 } // namespace boost From cf4f510655d381a8151ab038d41d68d52709cf60 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 16:41:53 -0500 Subject: [PATCH 034/137] Add description of mixed type comparisons --- doc/modules/ROOT/pages/mixed_type_ops.adoc | 81 ++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/doc/modules/ROOT/pages/mixed_type_ops.adoc b/doc/modules/ROOT/pages/mixed_type_ops.adoc index bc932eae..8aa916c2 100644 --- a/doc/modules/ROOT/pages/mixed_type_ops.adoc +++ b/doc/modules/ROOT/pages/mixed_type_ops.adoc @@ -84,3 +84,84 @@ constexpr uint128_t operator%(int128_t lhs, uint128_t rhs); } // namespace boost ---- + +== Comparisons + +If you define xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_COMPARE`], the operators have the following behavior. + +=== Equality + +[source, c++] +---- +constexpr bool operator==(uint128_t lhs, int128_t rhs); + +constexpr bool operator==(int128_t lhs, uint128_t rhs); +---- + +If the `int128_t` argument is less than 0 returns `false`. +Otherwise, returns the same as `static_cast(lhs) == static_cast(rhs)`. + +=== Inequality + +[source, c++] +---- +constexpr bool operator!=(uint128_t lhs, int128_t rhs); + +constexpr bool operator!=(int128_t lhs, uint128_t rhs); +---- + +If the `int128_t` argument is less than 0 returns `false`. +Otherwise, returns the same as `static_cast(lhs) != static_cast(rhs)`. + +=== Less Than + +[source, c++] +---- +constexpr bool operator<(uint128_t lhs, int128_t rhs); + +constexpr bool operator<(int128_t lhs, uint128_t rhs); +---- + +If `lhs` is type `int128_t` returns `true` if `lhs < 0` +If `rhs` is type `int128_t` returns `false` if `rhs < 0` +Otherwise, returns the same as `static_cast(lhs) < static_cast(rhs)`. + +=== Less Than or Equal To + +[source, c++] +---- +constexpr bool operator<=(uint128_t lhs, int128_t rhs); + +constexpr bool operator<=(int128_t lhs, uint128_t rhs); +---- + +If `lhs` is type `int128_t` returns `true` if `lhs < 0` +If `rhs` is type `int128_t` returns `false` if `rhs < 0` +Otherwise, returns the same as `static_cast(lhs) pass:[<=] static_cast(rhs)`. + +=== Greater Than + +[source, c++] +---- +constexpr bool operator>(uint128_t lhs, int128_t rhs); + +constexpr bool operator>(int128_t lhs, uint128_t rhs); +---- + +If `lhs` is type `int128_t` returns `false` if `lhs < 0` +If `rhs` is type `int128_t` returns `true` if `rhs < 0` +Otherwise, returns the same as `static_cast(lhs) > static_cast(rhs)`. + +=== Less Than or Equal To + +[source, c++] +---- +constexpr bool operator>=(uint128_t lhs, int128_t rhs); + +constexpr bool operator>=(int128_t lhs, uint128_t rhs); +---- + +If `lhs` is type `int128_t` returns `false` if `lhs < 0` +If `rhs` is type `int128_t` returns `true` if `rhs < 0` +Otherwise, returns the same as `static_cast(lhs) pass:[>=] static_cast(rhs)`. + From 2beb66d812106393ab28575b1b9bcf01f894454f Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 3 Mar 2026 16:48:28 -0500 Subject: [PATCH 035/137] Add description of mixed type arithmetic operators --- doc/modules/ROOT/pages/mixed_type_ops.adoc | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/doc/modules/ROOT/pages/mixed_type_ops.adoc b/doc/modules/ROOT/pages/mixed_type_ops.adoc index 8aa916c2..5a02877d 100644 --- a/doc/modules/ROOT/pages/mixed_type_ops.adoc +++ b/doc/modules/ROOT/pages/mixed_type_ops.adoc @@ -165,3 +165,61 @@ If `lhs` is type `int128_t` returns `false` if `lhs < 0` If `rhs` is type `int128_t` returns `true` if `rhs < 0` Otherwise, returns the same as `static_cast(lhs) pass:[>=] static_cast(rhs)`. +== Arithmetic + +If you define xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_CONVERSION`], the operators have the following behavior. + +=== Addition + +[source, c++] +---- +constexpr uint128_t operator+(uint128_t lhs, int128_t rhs); + +constexpr uint128_t operator+(int128_t lhs, uint128_t rhs); +---- + +Returns the same as `static_cast(lhs) + static_cast(rhs)` + +=== Subtraction + +[source, c++] +---- +constexpr uint128_t operator-(uint128_t lhs, int128_t rhs); + +constexpr uint128_t operator-(int128_t lhs, uint128_t rhs); +---- + +Returns the same as `static_cast(lhs) - static_cast(rhs)` + +=== Multiplication + +[source, c++] +---- +constexpr uint128_t operator*(uint128_t lhs, int128_t rhs); + +constexpr uint128_t operator*(int128_t lhs, uint128_t rhs); +---- + +Returns the same as `static_cast(lhs) * static_cast(rhs)` + +=== Division + +[source, c++] +---- +constexpr uint128_t operator/(uint128_t lhs, int128_t rhs); + +constexpr uint128_t operator/(int128_t lhs, uint128_t rhs); +---- + +Returns the same as `static_cast(lhs) / static_cast(rhs)` + +=== Modulo + +[source, c++] +---- +constexpr uint128_t operator%(uint128_t lhs, int128_t rhs); + +constexpr uint128_t operator%(int128_t lhs, uint128_t rhs); +---- + +Returns the same as `static_cast(lhs) % static_cast(rhs)` From b9e0150d7cfc9e649055b86bd7573246124586ac Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 4 Mar 2026 10:04:10 -0500 Subject: [PATCH 036/137] Add random header compile test --- test/Jamfile | 1 + test/compile_tests/random_compile.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 test/compile_tests/random_compile.cpp diff --git a/test/Jamfile b/test/Jamfile index fd3f9325..de0ae2d9 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -127,3 +127,4 @@ compile compile_tests/limits_compile.cpp ; compile compile_tests/literals_compile.cpp ; compile compile_tests/numeric_compile.cpp ; compile compile_tests/string_compile.cpp ; +compile compile_tests/random_compile.cpp ; diff --git a/test/compile_tests/random_compile.cpp b/test/compile_tests/random_compile.cpp new file mode 100644 index 00000000..42d23f37 --- /dev/null +++ b/test/compile_tests/random_compile.cpp @@ -0,0 +1,10 @@ +// Copyright 2025 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +int main() +{ + return 0; +} From 18cd42a3f6b8764c75b0439478652c6cf3b51b21 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 4 Mar 2026 10:21:47 -0500 Subject: [PATCH 037/137] Add header of overloads for boost::random::traits --- examples/math_and_random.cpp | 1 + include/boost/int128/random.hpp | 96 +++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 include/boost/int128/random.hpp diff --git a/examples/math_and_random.cpp b/examples/math_and_random.cpp index 9e76f7aa..f99b2d53 100644 --- a/examples/math_and_random.cpp +++ b/examples/math_and_random.cpp @@ -6,6 +6,7 @@ #define BOOST_INT128_ALLOW_SIGN_CONVERSION #include +#include #ifdef __clang__ #pragma clang diagnostic push diff --git a/include/boost/int128/random.hpp b/include/boost/int128/random.hpp new file mode 100644 index 00000000..82e2975b --- /dev/null +++ b/include/boost/int128/random.hpp @@ -0,0 +1,96 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_INT128_RANDOM_HPP +#define BOOST_INT128_RANDOM_HPP + +#include + +namespace boost { +namespace random { +namespace traits { + +template +struct make_unsigned_imp; + +template <> +struct make_unsigned_imp +{ + using type = int128::uint128_t; +}; + +template <> +struct make_unsigned_imp +{ + using type = int128::uint128_t; +}; + +template +struct make_unsigned; + +template <> +struct make_unsigned +{ + using type = int128::uint128_t; +}; + +template <> +struct make_unsigned +{ + using type = int128::int128_t; +}; + +template +struct make_unsigned_or_unbounded_imp; + +template <> +struct make_unsigned_or_unbounded_imp +{ + using type = int128::uint128_t; +}; + +template <> +struct make_unsigned_or_unbounded_imp +{ + using type = int128::uint128_t; +}; + +template +struct make_unsigned_or_unbounded; + +template <> +struct make_unsigned_or_unbounded +{ + using type = int128::uint128_t; +}; + +template <> +struct make_unsigned_or_unbounded +{ + using type = int128::uint128_t; +}; + +template +struct is_integral; + +template <> +struct is_integral : std::true_type {}; + +template <> +struct is_integral : std::true_type {}; + +template +struct is_signed; + +template <> +struct is_signed : std::false_type {}; + +template <> +struct is_signed : std::true_type {}; + +} // namespace traits +} // namespace random +} // namespace boost + +#endif // BOOST_INT128_RANDOM_HPP From 9453488e539c7d9bd37d7357eb39e26f036a3268 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 4 Mar 2026 10:24:54 -0500 Subject: [PATCH 038/137] Update docs on boost random usage --- doc/modules/ROOT/pages/api_reference.adoc | 3 +++ examples/math_and_random.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc index 5d7ae0f5..b5a8bd4a 100644 --- a/doc/modules/ROOT/pages/api_reference.adoc +++ b/doc/modules/ROOT/pages/api_reference.adoc @@ -371,4 +371,7 @@ Listed by analogous STL header. | xref:string.adoc[``] | `to_string` overloads +| `` +| Required for usage of Boost.Random + |=== diff --git a/examples/math_and_random.cpp b/examples/math_and_random.cpp index f99b2d53..dfd4936e 100644 --- a/examples/math_and_random.cpp +++ b/examples/math_and_random.cpp @@ -6,7 +6,7 @@ #define BOOST_INT128_ALLOW_SIGN_CONVERSION #include -#include +#include // Not included in the convenience header, but needed for boost.random interop #ifdef __clang__ #pragma clang diagnostic push From d19304d3705062f8ca232b395dedd7c4e62eef16 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 4 Mar 2026 14:12:37 -0500 Subject: [PATCH 039/137] Add random int128_t generation to the docs --- doc/modules/ROOT/pages/examples.adoc | 11 +++++++++++ examples/math_and_random.cpp | 15 ++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc index c6be0982..1e83f2d0 100644 --- a/doc/modules/ROOT/pages/examples.adoc +++ b/doc/modules/ROOT/pages/examples.adoc @@ -303,6 +303,17 @@ signed_value / 4U = 0 ---- include::example$math_and_random.cpp[] ---- + +.Expected Output +[listing] +---- +=== uint128_t === + Mean: 22125900135088040520646253247977468 +Variance: 15183108029620265677746188314852225 + Median: 169775281866460752209725324063124732284 +=== int128_t === +Random int128_t: 45422201008201503618595888886744218664 +---- ==== [#examples_boost_charconv] diff --git a/examples/math_and_random.cpp b/examples/math_and_random.cpp index dfd4936e..d9897028 100644 --- a/examples/math_and_random.cpp +++ b/examples/math_and_random.cpp @@ -28,6 +28,8 @@ int main() { + std::cout << "=== uint128_t ===" << '\n'; + // Setup our rng and distribution std::mt19937_64 rng {42}; boost::random::uniform_int_distribution dist {0, (std::numeric_limits::max)()}; @@ -40,9 +42,16 @@ int main() } // Perform some rudimentary statistical analysis on our dataset - std::cout << " Mean: " << boost::math::statistics::mean(data_set) << std::endl; - std::cout << "Variance: " << boost::math::statistics::variance(data_set) << std::endl; - std::cout << " Median: " << boost::math::statistics::median(data_set) << std::endl; + std::cout << " Mean: " << boost::math::statistics::mean(data_set) << '\n'; + std::cout << "Variance: " << boost::math::statistics::variance(data_set) << '\n'; + std::cout << " Median: " << boost::math::statistics::median(data_set) << '\n'; + + std::cout << "=== int128_t ===" << '\n'; + + // We can also generate random signed integers using int128_t + boost::random::uniform_int_distribution signed_dist {std::numeric_limits::min(), std::numeric_limits::max()}; + + std::cout << "Random int128_t: " << signed_dist(rng) << std::endl; return 0; } From 020dd79d8cdb8d1a22ad24431566800a4bec7c0c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 4 Mar 2026 15:20:50 -0500 Subject: [PATCH 040/137] Add MSVC 2026 runner --- .drone.jsonnet | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.drone.jsonnet b/.drone.jsonnet index d459d3ec..4aaae745 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -416,4 +416,10 @@ local windows_pipeline(name, image, environment, arch = "amd64") = "cppalliance/dronevs2022:1", { TOOLSET: 'msvc-14.3', CXXSTD: '14,17,20,latest', ADDRMD: '32,64' }, ), + + windows_pipeline( + "Windows VS2026 msvc-14.5", + "cppalliance/dronevs2026:1", + { TOOLSET: 'msvc-14.5', CXXSTD: '14,17,20,latest', ADDRMD: '32,64' }, + ), ] From b414b6454c51e5a965682028f5b70d74882cb8ba Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 10:55:06 -0400 Subject: [PATCH 041/137] Update docs to show explicit construction between types --- doc/modules/ROOT/pages/int128_t.adoc | 2 +- doc/modules/ROOT/pages/uint128_t.adoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/modules/ROOT/pages/int128_t.adoc b/doc/modules/ROOT/pages/int128_t.adoc index e99ba668..1765099d 100644 --- a/doc/modules/ROOT/pages/int128_t.adoc +++ b/doc/modules/ROOT/pages/int128_t.adoc @@ -87,7 +87,7 @@ struct int128_t constexpr int128_t& operator=(const int128_t&) noexcept = default; constexpr int128_t& operator=(int128_t&&) noexcept = default; - constexpr int128_t(const uint128_t& v) noexcept; + explicit constexpr int128_t(const uint128_t& v) noexcept; // Construct from integral types constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept; diff --git a/doc/modules/ROOT/pages/uint128_t.adoc b/doc/modules/ROOT/pages/uint128_t.adoc index 7a616eb9..06c2a59f 100644 --- a/doc/modules/ROOT/pages/uint128_t.adoc +++ b/doc/modules/ROOT/pages/uint128_t.adoc @@ -126,7 +126,7 @@ struct uint128_t constexpr uint128_t& operator=(const uint128_t&) noexcept = default; constexpr uint128_t& operator=(uint128_t&&) noexcept = default; - constexpr uint128_t(const int128_t& v) noexcept; + explicit constexpr uint128_t(const int128_t& v) noexcept; // Construct from integral types constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept; From c699e368022d81a3b0c06a3b2ae268ad4d4258b9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 11:06:07 -0400 Subject: [PATCH 042/137] Add CUDA pipeline --- .github/workflows/ci.yml | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15396eb6..2e6bc882 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1146,3 +1146,61 @@ jobs: cd ~/pkgconfig_test g++ main.cpp $(pkg-config --cflags --libs boost_int128) -o test_pkgconfig ./test_pkgconfig + + cuda-cmake-test: + strategy: + fail-fast: false + + runs-on: gpu-runner-1 + + steps: + - uses: Jimver/cuda-toolkit@v0.2.25 + id: cuda-toolkit + with: + cuda: '12.8.0' + method: 'network' + sub-packages: '["nvcc"]' + + - name: Output CUDA information + run: | + echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+ + echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + nvcc -V + - uses: actions/checkout@v4 + + - name: Install Packages + run: | + sudo apt-get install -y cmake make + - name: Setup Boost + run: | + echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY + LIBRARY=${GITHUB_REPOSITORY#*/} + echo LIBRARY: $LIBRARY + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + echo GITHUB_BASE_REF: $GITHUB_BASE_REF + echo GITHUB_REF: $GITHUB_REF + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + echo REF: $REF + BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true + echo BOOST_BRANCH: $BOOST_BRANCH + cd .. + git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + mkdir -p libs/$LIBRARY + cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY + git submodule update --init tools/boostdep + python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY + - name: Configure + run: | + cd ../boost-root + mkdir __build__ && cd __build__ + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=86 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 .. + - name: Build tests + run: | + cd ../boost-root/__build__ + cmake --build . --target tests -j $(nproc) + - name: Run tests + run: | + cd ../boost-root/__build__ + ctest --output-on-failure --no-tests=error From 68f3ec53c27614667e2f62ec8d949533e4f4c826 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 11:21:42 -0400 Subject: [PATCH 043/137] Add managed pointer file --- test/cuda_managed_ptr.hpp | 139 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 test/cuda_managed_ptr.hpp diff --git a/test/cuda_managed_ptr.hpp b/test/cuda_managed_ptr.hpp new file mode 100644 index 00000000..c9b1ab60 --- /dev/null +++ b/test/cuda_managed_ptr.hpp @@ -0,0 +1,139 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_INT128_CUDA_MANAGED_PTR_HPP +#define BOOST_INT128_CUDA_MANAGED_PTR_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +class managed_holder_base +{ +protected: + static int count; + managed_holder_base() { ++count; } + ~managed_holder_base() + { + if(0 == --count) + cudaDeviceSynchronize(); + } +}; + +int managed_holder_base::count = 0; + +// +// Reset the device and exit: +// cudaDeviceReset causes the driver to clean up all state. While +// not mandatory in normal operation, it is good practice. It is also +// needed to ensure correct operation when the application is being +// profiled. Calling cudaDeviceReset causes all profile data to be +// flushed before the application exits. +// +// We have a global instance of this class, plus instances for each +// managed pointer. Last one out the door switches the lights off. +// +class cudaResetter +{ + static int count; +public: + cudaResetter() { ++count; } + ~cudaResetter() + { + if(--count == 0) + { + cudaError_t err = cudaDeviceReset(); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +}; + +int cudaResetter::count = 0; + +cudaResetter global_resetter; + +template +class cuda_managed_ptr +{ + T* data; + static const cudaResetter resetter; + cuda_managed_ptr(const cuda_managed_ptr&) = delete; + cuda_managed_ptr& operator=(cuda_managed_ptr const&) = delete; + void free() + { + if(data) + { + cudaDeviceSynchronize(); + cudaError_t err = cudaFree(data); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +public: + cuda_managed_ptr() : data(0) {} + cuda_managed_ptr(std::size_t n) + { + cudaError_t err = cudaSuccess; + void *ptr; + err = cudaMallocManaged(&ptr, n * sizeof(T)); + if(err != cudaSuccess) + throw std::runtime_error(cudaGetErrorString(err)); + cudaDeviceSynchronize(); + data = static_cast(ptr); + } + cuda_managed_ptr(cuda_managed_ptr&& o) + { + data = o.data; + o.data = 0; + } + cuda_managed_ptr& operator=(cuda_managed_ptr&& o) + { + free(); + data = o.data; + o.data = 0; + return *this; + } + ~cuda_managed_ptr() + { + free(); + } + + class managed_holder : managed_holder_base + { + T* pdata; + public: + managed_holder(T* p) : managed_holder_base(), pdata(p) {} + managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator T* () { return pdata; } + T& operator[] (std::size_t n) { return pdata[n]; } + }; + class const_managed_holder : managed_holder_base + { + const T* pdata; + public: + const_managed_holder(T* p) : managed_holder_base(), pdata(p) {} + const_managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator const T* () { return pdata; } + const T& operator[] (std::size_t n) { return pdata[n]; } + }; + + managed_holder get() { return managed_holder(data); } + const_managed_holder get()const { return data; } + T& operator[](std::size_t n) { return data[n]; } + const T& operator[](std::size_t n)const { return data[n]; } +}; + +template +cudaResetter const cuda_managed_ptr::resetter; + +#endif // BOOST_INT128_CUDA_MANAGED_PTR_HPP From bd4ced5262eebaf0875114aa23449424f99d5a35 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 11:22:27 -0400 Subject: [PATCH 044/137] Add stopwatch header --- test/stopwatch.hpp | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 test/stopwatch.hpp diff --git a/test/stopwatch.hpp b/test/stopwatch.hpp new file mode 100644 index 00000000..9f3c60de --- /dev/null +++ b/test/stopwatch.hpp @@ -0,0 +1,39 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_CUDA_STOPWATCH_HPP +#define BOOST_MATH_CUDA_STOPWATCH_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +template +struct stopwatch +{ + typedef typename Clock::duration duration; + stopwatch() + { + m_start = Clock::now(); + } + double elapsed() + { + duration t = Clock::now() - m_start; + return std::chrono::duration_cast>(t).count(); + } + void reset() + { + m_start = Clock::now(); + } + +private: + typename Clock::time_point m_start; +}; + +typedef stopwatch watch; + +#endif From 6a3e9772ada5b92c11bbb7840eaf67c35dcd42fe Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 11:43:43 -0400 Subject: [PATCH 045/137] Add NVCC testing of addition --- test/test_add.cu | 106 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 test/test_add.cu diff --git a/test/test_add.cu b/test/test_add.cu new file mode 100644 index 00000000..e5bb6e02 --- /dev/null +++ b/test/test_add.cu @@ -0,0 +1,106 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] + in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution<> dist(std::numeric_limits::min() / test_type{2}, std::numeric_limits::max() / test_type{2}); + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] + input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 61bd1b9be40b314890de67c9295a08a728d4934b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 11:43:53 -0400 Subject: [PATCH 046/137] Add CUDA specific jamfile --- test/cuda_jamfile | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 test/cuda_jamfile diff --git a/test/cuda_jamfile b/test/cuda_jamfile new file mode 100644 index 00000000..05e22d5b --- /dev/null +++ b/test/cuda_jamfile @@ -0,0 +1,12 @@ +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +import testing ; +import ../../config/checks/config : requires ; + +project : requirements + [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] + ; + +run test_add.cu ; \ No newline at end of file From 7269dd34fee1cedd0fe61819110b88c4ab2484b5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 14:07:51 -0400 Subject: [PATCH 047/137] Add host device macro --- include/boost/int128/detail/config.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/boost/int128/detail/config.hpp b/include/boost/int128/detail/config.hpp index 7fd2c8d2..cf176850 100644 --- a/include/boost/int128/detail/config.hpp +++ b/include/boost/int128/detail/config.hpp @@ -285,4 +285,10 @@ using builtin_u128 = std::_Unsigned128; # endif #endif +#ifdef __NVCC__ +# define BOOST_INT128_HOST_DEVICE __host__ __device__ +#else +# define BOOST_INT128_HOST_DEVICE +#endif + #endif // BOOST_INT128_DETAIL_CONFIG_HPP From 0f64e24fa0d5dac2ef57bb195f315cea25212b06 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 15:00:08 -0400 Subject: [PATCH 048/137] Bulk implementation of CUDA markers --- include/boost/int128/bit.hpp | 40 +- include/boost/int128/cstdlib.hpp | 4 +- include/boost/int128/detail/clz.hpp | 24 +- include/boost/int128/detail/common_div.hpp | 32 +- include/boost/int128/detail/common_mul.hpp | 8 +- include/boost/int128/detail/conversions.hpp | 30 +- include/boost/int128/detail/ctz.hpp | 16 +- include/boost/int128/detail/int128_imp.hpp | 616 +++++++++--------- .../boost/int128/detail/mini_from_chars.hpp | 8 +- include/boost/int128/detail/mini_to_chars.hpp | 4 +- include/boost/int128/detail/uint128_imp.hpp | 598 ++++++++--------- include/boost/int128/detail/utilities.hpp | 4 +- include/boost/int128/literals.hpp | 24 +- include/boost/int128/numeric.hpp | 40 +- 14 files changed, 725 insertions(+), 723 deletions(-) diff --git a/include/boost/int128/bit.hpp b/include/boost/int128/bit.hpp index 578e0ab1..39e46535 100644 --- a/include/boost/int128/bit.hpp +++ b/include/boost/int128/bit.hpp @@ -13,53 +13,53 @@ namespace boost { namespace int128 { -BOOST_INT128_EXPORT constexpr bool has_single_bit(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool has_single_bit(const uint128_t x) noexcept { return x && !(x & (x - 1U)); } -BOOST_INT128_EXPORT constexpr int countl_zero(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int countl_zero(const uint128_t x) noexcept { return x.high == 0 ? 64 + detail::countl_zero(x.low) : detail::countl_zero(x.high); } -BOOST_INT128_EXPORT constexpr int countl_one(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int countl_one(const uint128_t x) noexcept { return countl_zero(~x); } -BOOST_INT128_EXPORT constexpr int bit_width(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int bit_width(const uint128_t x) noexcept { return x ? 128 - countl_zero(x) : 0; } -BOOST_INT128_EXPORT constexpr uint128_t bit_ceil(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t bit_ceil(const uint128_t x) noexcept { return x <= 1U ? static_cast(1) : static_cast(1) << bit_width(x - 1U); } -BOOST_INT128_EXPORT constexpr uint128_t bit_floor(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t bit_floor(const uint128_t x) noexcept { return x > 0U ? static_cast(1) << (bit_width(x) - 1U) : static_cast(0); } -BOOST_INT128_EXPORT constexpr int countr_zero(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int countr_zero(const uint128_t x) noexcept { return x.low == 0 ? 64 + detail::countr_zero(x.high) : detail::countr_zero(x.low); } -BOOST_INT128_EXPORT constexpr int countr_one(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int countr_one(const uint128_t x) noexcept { return countr_zero(~x); } -BOOST_INT128_EXPORT constexpr uint128_t rotl(const uint128_t x, const int s) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t rotl(const uint128_t x, const int s) noexcept { constexpr auto mask {127U}; return x << (static_cast(s) & mask) | x >> (static_cast(-s) & mask); } -BOOST_INT128_EXPORT constexpr uint128_t rotr(const uint128_t x, const int s) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t rotr(const uint128_t x, const int s) noexcept { constexpr auto mask {127U}; return x >> (static_cast(s) & mask) | x << (static_cast(-s) & mask); @@ -67,7 +67,7 @@ BOOST_INT128_EXPORT constexpr uint128_t rotr(const uint128_t x, const int s) noe #if BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) -BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { return __builtin_popcountll(x.high) + __builtin_popcountll(x.low); } @@ -76,7 +76,7 @@ BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept namespace impl { -constexpr int popcount_impl(std::uint64_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int popcount_impl(std::uint64_t x) noexcept { x = x - ((x >> 1U) & UINT64_C(0x5555555555555555)); x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2U) & UINT64_C(0x3333333333333333)); @@ -89,7 +89,7 @@ constexpr int popcount_impl(std::uint64_t x) noexcept #if defined(_M_AMD64) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) -BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -111,7 +111,7 @@ BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept #elif defined(_M_IX86) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) -BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -141,7 +141,7 @@ BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept #elif !BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) -BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { return impl::popcount_impl(x.high) + impl::popcount_impl(x.low); } @@ -150,7 +150,7 @@ BOOST_INT128_EXPORT constexpr int popcount(const uint128_t x) noexcept #if BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) -BOOST_INT128_EXPORT constexpr uint128_t byteswap(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const uint128_t x) noexcept { return {__builtin_bswap64(x.low), __builtin_bswap64(x.high)}; } @@ -159,14 +159,14 @@ BOOST_INT128_EXPORT constexpr uint128_t byteswap(const uint128_t x) noexcept namespace impl { -BOOST_INT128_EXPORT constexpr std::uint64_t byteswap_impl(const std::uint64_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr std::uint64_t byteswap_impl(const std::uint64_t x) noexcept { const auto step32 {x << 32U | x >> 32U}; const auto step16 {(step32 & UINT64_C(0x0000FFFF0000FFFF)) << 16U | (step32 & UINT64_C(0xFFFF0000FFFF0000)) >> 16U}; return (step16 & UINT64_C(0x00FF00FF00FF00FF)) << 8U | (step16 & UINT64_C(0xFF00FF00FF00FF00)) >> 8U; } -BOOST_INT128_EXPORT constexpr uint128_t byteswap_impl(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap_impl(const uint128_t x) noexcept { return {byteswap_impl(x.low), byteswap_impl(x.high)}; } @@ -175,7 +175,7 @@ BOOST_INT128_EXPORT constexpr uint128_t byteswap_impl(const uint128_t x) noexcep #if defined(_MSC_VER) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) -BOOST_INT128_EXPORT constexpr uint128_t byteswap(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const uint128_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -189,7 +189,7 @@ BOOST_INT128_EXPORT constexpr uint128_t byteswap(const uint128_t x) noexcept #elif !BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) -BOOST_INT128_EXPORT constexpr uint128_t byteswap(const uint128_t x) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const uint128_t x) noexcept { return impl::byteswap_impl(x); } diff --git a/include/boost/int128/cstdlib.hpp b/include/boost/int128/cstdlib.hpp index 228ecd9c..2839ac39 100644 --- a/include/boost/int128/cstdlib.hpp +++ b/include/boost/int128/cstdlib.hpp @@ -22,7 +22,7 @@ BOOST_INT128_EXPORT struct i128div_t int128_t rem; }; -BOOST_INT128_EXPORT constexpr u128div_t div(const uint128_t x, const uint128_t y) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr u128div_t div(const uint128_t x, const uint128_t y) noexcept { if (BOOST_INT128_UNLIKELY(x == 0U || y == 0U)) { @@ -54,7 +54,7 @@ BOOST_INT128_EXPORT constexpr u128div_t div(const uint128_t x, const uint128_t y } } -BOOST_INT128_EXPORT constexpr i128div_t div(const int128_t x, const int128_t y) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr i128div_t div(const int128_t x, const int128_t y) noexcept { if (BOOST_INT128_UNLIKELY(x == 0 || y == 0)) { diff --git a/include/boost/int128/detail/clz.hpp b/include/boost/int128/detail/clz.hpp index 4356c830..af5474eb 100644 --- a/include/boost/int128/detail/clz.hpp +++ b/include/boost/int128/detail/clz.hpp @@ -32,7 +32,7 @@ BOOST_INT128_INLINE_CONSTEXPR int index64[64] = { 13, 18, 8, 12, 7, 6, 5, 63 }; -constexpr int bit_scan_reverse(std::uint64_t bb) noexcept +BOOST_INT128_HOST_DEVICE constexpr int bit_scan_reverse(std::uint64_t bb) noexcept { constexpr auto debruijn64 {UINT64_C(0x03f79d71b4cb0a89)}; @@ -56,7 +56,7 @@ BOOST_INT128_INLINE_CONSTEXPR int countl_mod37[37] = { 27, 12, 24, 13, 14, 0 }; -constexpr int backup_countl_impl(std::uint32_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int backup_countl_impl(std::uint32_t x) noexcept { x |= x >> 1; x |= x >> 2; @@ -69,24 +69,24 @@ constexpr int backup_countl_impl(std::uint32_t x) noexcept #if BOOST_INT128_HAS_BUILTIN(__builtin_clz) -constexpr int countl_impl(unsigned int x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(unsigned int x) noexcept { return x ? __builtin_clz(x) : std::numeric_limits::digits; } -constexpr int countl_impl(unsigned long x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(unsigned long x) noexcept { return x ? __builtin_clzl(x) : std::numeric_limits::digits; } -constexpr int countl_impl(unsigned long long x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(unsigned long long x) noexcept { return x ? __builtin_clzll(x) : std::numeric_limits::digits; } #elif (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) -constexpr int countl_impl(std::uint32_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint32_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -107,7 +107,7 @@ constexpr int countl_impl(std::uint32_t x) noexcept } } -constexpr int countl_impl(std::uint64_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint64_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -130,7 +130,7 @@ constexpr int countl_impl(std::uint64_t x) noexcept #elif defined(_M_IX86) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) -constexpr int countl_impl(std::uint32_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint32_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -151,7 +151,7 @@ constexpr int countl_impl(std::uint32_t x) noexcept } } -constexpr int countl_impl(std::uint64_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint64_t x) noexcept { return x ? bit_scan_reverse(static_cast(x)) ^ 63 : std::numeric_limits::digits; } @@ -159,12 +159,12 @@ constexpr int countl_impl(std::uint64_t x) noexcept #else template -constexpr int countl_impl(T x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(T x) noexcept { return x ? bit_scan_reverse(static_cast(x)) ^ 63 : std::numeric_limits::digits; } -constexpr int countl_impl(std::uint32_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint32_t x) noexcept { return backup_countl_impl(x); } @@ -175,7 +175,7 @@ constexpr int countl_impl(std::uint32_t x) noexcept } // namespace impl template -constexpr int countl_zero(T x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countl_zero(T x) noexcept { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Can only count with unsigned integers"); diff --git a/include/boost/int128/detail/common_div.hpp b/include/boost/int128/detail/common_div.hpp index 088816ca..fd3826a0 100644 --- a/include/boost/int128/detail/common_div.hpp +++ b/include/boost/int128/detail/common_div.hpp @@ -25,7 +25,7 @@ namespace detail { #endif template -BOOST_INT128_FORCE_INLINE constexpr void half_word_div(const T& lhs, const std::uint32_t rhs, T& quotient, T& remainder) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void half_word_div(const T& lhs, const std::uint32_t rhs, T& quotient, T& remainder) noexcept { using high_word_type = decltype(T{}.high); @@ -54,7 +54,7 @@ BOOST_INT128_FORCE_INLINE constexpr void half_word_div(const T& lhs, const std:: } template -BOOST_INT128_FORCE_INLINE constexpr void half_word_div(const T& lhs, const std::uint32_t rhs, T& quotient) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void half_word_div(const T& lhs, const std::uint32_t rhs, T& quotient) noexcept { BOOST_INT128_ASSUME(rhs != 0); // LCOV_EXCL_LINE @@ -73,7 +73,7 @@ namespace impl { #endif template -BOOST_INT128_FORCE_INLINE constexpr void unpack_v(std::uint32_t (&vn)[4], const std::uint32_t (&v)[v_size], +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void unpack_v(std::uint32_t (&vn)[4], const std::uint32_t (&v)[v_size], const bool needs_shift, const int s, const int complement_s, const std::integral_constant&) noexcept { vn[1] = needs_shift ? ((v[1] << s) | (v[0] >> complement_s)) : v[1]; @@ -81,7 +81,7 @@ BOOST_INT128_FORCE_INLINE constexpr void unpack_v(std::uint32_t (&vn)[4], const } template -BOOST_INT128_FORCE_INLINE constexpr void unpack_v(std::uint32_t (&vn)[4], const std::uint32_t (&v)[v_size], +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void unpack_v(std::uint32_t (&vn)[4], const std::uint32_t (&v)[v_size], const bool needs_shift, const int s, const int complement_s, const std::integral_constant&) noexcept { vn[3] = needs_shift ? ((v[3] << s) | (v[2] >> complement_s)) : v[3]; @@ -93,7 +93,7 @@ BOOST_INT128_FORCE_INLINE constexpr void unpack_v(std::uint32_t (&vn)[4], const // See: The Art of Computer Programming Volume 2 (Semi-numerical algorithms) section 4.3.1 // Algorithm D: Division of Non-negative integers template -constexpr void knuth_divide(std::uint32_t (&u)[u_size], const std::size_t m, +BOOST_INT128_HOST_DEVICE constexpr void knuth_divide(std::uint32_t (&u)[u_size], const std::size_t m, const std::uint32_t (&v)[v_size], const std::size_t n, std::uint32_t (&q)[q_size]) noexcept { @@ -203,7 +203,7 @@ constexpr void knuth_divide(std::uint32_t (&u)[u_size], const std::size_t m, #endif template -BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const T& x, std::uint32_t (&words)[4]) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const T& x, std::uint32_t (&words)[4]) noexcept { #if !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_ENDIAN_BIG_BYTE if (!BOOST_INT128_IS_CONSTANT_EVALUATED(x)) @@ -230,7 +230,7 @@ BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const T& x, std::uint32 return word_count; } -BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const std::uint64_t x, std::uint32_t (&words)[2]) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const std::uint64_t x, std::uint32_t (&words)[2]) noexcept { #if !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_ENDIAN_BIG_BYTE if (!BOOST_INT128_IS_CONSTANT_EVALUATED(x)) @@ -247,7 +247,7 @@ BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const std::uint64_t x, return x > UINT32_MAX ? 2 : 1; } -BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const std::uint32_t x, std::uint32_t (&words)[1]) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const std::uint32_t x, std::uint32_t (&words)[1]) noexcept { words[0] = x; @@ -255,7 +255,7 @@ BOOST_INT128_FORCE_INLINE constexpr std::size_t to_words(const std::uint32_t x, } template -BOOST_INT128_FORCE_INLINE constexpr T from_words(const std::uint32_t (&words)[4]) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr T from_words(const std::uint32_t (&words)[4]) noexcept { using high_word_type = decltype(T{}.high); @@ -268,7 +268,7 @@ BOOST_INT128_FORCE_INLINE constexpr T from_words(const std::uint32_t (&words)[4] #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920 template -constexpr T div_mod_msvc(T dividend, T divisor, T& remainder) +BOOST_INT128_HOST_DEVICE constexpr T div_mod_msvc(T dividend, T divisor, T& remainder) { using high_word_type = decltype(T{}.high); @@ -395,7 +395,7 @@ constexpr T div_mod_msvc(T dividend, T divisor, T& remainder) // In the division case it is a waste of cycles template -BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint64_t rhs, T& quotient) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint64_t rhs, T& quotient) noexcept { #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920 && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) @@ -431,7 +431,7 @@ BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::u } template -BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint64_t rhs, T& quotient, T& remainder) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint64_t rhs, T& quotient, T& remainder) noexcept { #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920 && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) @@ -470,13 +470,13 @@ BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::u } template -BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint32_t rhs, T& quotient, T& remainder) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint32_t rhs, T& quotient, T& remainder) noexcept { half_word_div(lhs, rhs, quotient, remainder); } template -BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint32_t rhs, T& quotient) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint32_t rhs, T& quotient) noexcept { half_word_div(lhs, rhs, quotient); } @@ -488,7 +488,7 @@ BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::u #endif template -BOOST_INT128_FORCE_INLINE constexpr T knuth_div(const T& dividend, const T& divisor) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr T knuth_div(const T& dividend, const T& divisor) noexcept { BOOST_INT128_ASSUME(divisor != static_cast(0)); @@ -519,7 +519,7 @@ BOOST_INT128_FORCE_INLINE constexpr T knuth_div(const T& dividend, const T& divi } template -BOOST_INT128_FORCE_INLINE constexpr T knuth_div(const T& dividend, const T& divisor, T& remainder) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr T knuth_div(const T& dividend, const T& divisor, T& remainder) noexcept { BOOST_INT128_ASSUME(divisor != static_cast(0)); diff --git a/include/boost/int128/detail/common_mul.hpp b/include/boost/int128/detail/common_mul.hpp index a462627c..be26c763 100644 --- a/include/boost/int128/detail/common_mul.hpp +++ b/include/boost/int128/detail/common_mul.hpp @@ -21,7 +21,7 @@ namespace detail { // See: The Art of Computer Programming Volume 2 (Semi-numerical algorithms) section 4.3.1 // Algorithm M: Multiplication of Non-negative integers template -BOOST_INT128_FORCE_INLINE constexpr ReturnType knuth_multiply(const std::uint32_t (&u)[u_size], +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr ReturnType knuth_multiply(const std::uint32_t (&u)[u_size], const std::uint32_t (&v)[v_size]) noexcept { using high_word_type = decltype(ReturnType{}.high); @@ -59,7 +59,7 @@ BOOST_INT128_FORCE_INLINE constexpr ReturnType knuth_multiply(const std::uint32_ } template -BOOST_INT128_FORCE_INLINE constexpr void to_words(const T& x, std::uint32_t (&words)[4]) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void to_words(const T& x, std::uint32_t (&words)[4]) noexcept { #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION @@ -78,7 +78,7 @@ BOOST_INT128_FORCE_INLINE constexpr void to_words(const T& x, std::uint32_t (&wo } -BOOST_INT128_FORCE_INLINE constexpr void to_words(const std::uint64_t x, std::uint32_t (&words)[2]) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void to_words(const std::uint64_t x, std::uint32_t (&words)[2]) noexcept { #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION @@ -94,7 +94,7 @@ BOOST_INT128_FORCE_INLINE constexpr void to_words(const std::uint64_t x, std::ui words[1] = static_cast(x >> 32); // LCOV_EXCL_LINE } -BOOST_INT128_FORCE_INLINE constexpr void to_words(const std::uint32_t x, std::uint32_t (&words)[1]) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void to_words(const std::uint32_t x, std::uint32_t (&words)[1]) noexcept { words[0] = x; } diff --git a/include/boost/int128/detail/conversions.hpp b/include/boost/int128/detail/conversions.hpp index 397a5e3b..a549db03 100644 --- a/include/boost/int128/detail/conversions.hpp +++ b/include/boost/int128/detail/conversions.hpp @@ -26,15 +26,15 @@ BOOST_INT128_INLINE_CONSTEXPR bool is_valid_overload_v = valid_overload::valu #if BOOST_INT128_ENDIAN_LITTLE_BYTE -constexpr int128_t::int128_t(const uint128_t& v) noexcept : low {v.low}, high {static_cast(v.high)} {} +BOOST_INT128_HOST_DEVICE constexpr int128_t::int128_t(const uint128_t& v) noexcept : low {v.low}, high {static_cast(v.high)} {} -constexpr uint128_t::uint128_t(const int128_t& v) noexcept : low {v.low}, high {static_cast(v.high)} {} +BOOST_INT128_HOST_DEVICE constexpr uint128_t::uint128_t(const int128_t& v) noexcept : low {v.low}, high {static_cast(v.high)} {} #else -constexpr int128_t::int128_t(const uint128_t& v) noexcept : high {static_cast(v.high)}, low {v.low} {} +BOOST_INT128_HOST_DEVICE constexpr int128_t::int128_t(const uint128_t& v) noexcept : high {static_cast(v.high)}, low {v.low} {} -constexpr uint128_t::uint128_t(const int128_t& v) noexcept : high {static_cast(v.high)}, low {v.low} {} +BOOST_INT128_HOST_DEVICE constexpr uint128_t::uint128_t(const int128_t& v) noexcept : high {static_cast(v.high)}, low {v.low} {} #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE @@ -48,7 +48,7 @@ constexpr uint128_t::uint128_t(const int128_t& v) noexcept : high {static_cast && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator==(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -82,7 +82,7 @@ constexpr bool operator==(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator!=(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -116,7 +116,7 @@ constexpr bool operator!=(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator<(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -150,7 +150,7 @@ constexpr bool operator<(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator<=(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -184,7 +184,7 @@ constexpr bool operator<=(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator>(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -218,7 +218,7 @@ constexpr bool operator>(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr bool operator>=(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -256,7 +256,7 @@ constexpr bool operator>=(const T lhs, const U rhs) noexcept //===================================== template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr uint128_t operator+(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -272,7 +272,7 @@ constexpr uint128_t operator+(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr uint128_t operator-(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -288,7 +288,7 @@ constexpr uint128_t operator-(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr uint128_t operator*(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -304,7 +304,7 @@ constexpr uint128_t operator*(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr uint128_t operator/(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -320,7 +320,7 @@ constexpr uint128_t operator/(const T lhs, const U rhs) noexcept } template && detail::is_valid_overload_v && !std::is_same::value, bool> = true> -constexpr uint128_t operator%(const T lhs, const U rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const T lhs, const U rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION diff --git a/include/boost/int128/detail/ctz.hpp b/include/boost/int128/detail/ctz.hpp index eddb7c9f..297d884b 100644 --- a/include/boost/int128/detail/ctz.hpp +++ b/include/boost/int128/detail/ctz.hpp @@ -22,17 +22,17 @@ namespace impl { #if BOOST_INT128_HAS_BUILTIN(__builtin_ctz) -constexpr int countr_impl(unsigned int x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_impl(unsigned int x) noexcept { return x ? __builtin_ctz(x) : std::numeric_limits::digits; } -constexpr int countr_impl(unsigned long x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_impl(unsigned long x) noexcept { return x ? __builtin_ctzl(x) : std::numeric_limits::digits; } -constexpr int countr_impl(unsigned long long x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_impl(unsigned long long x) noexcept { return x ? __builtin_ctzll(x) : std::numeric_limits::digits; } @@ -52,7 +52,7 @@ BOOST_INT128_INLINE_CONSTEXPR int countr_mod37[37] = { #pragma warning(push) #pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned -constexpr int countr_impl(std::uint32_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -82,7 +82,7 @@ constexpr int countr_impl(std::uint32_t x) noexcept #pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned #endif -constexpr int countr_impl(std::uint32_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept { return countr_mod37[(-x & x) % 37]; } @@ -95,7 +95,7 @@ constexpr int countr_impl(std::uint32_t x) noexcept #if (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) -constexpr int countr_impl(std::uint64_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint64_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -118,7 +118,7 @@ constexpr int countr_impl(std::uint64_t x) noexcept #elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) -constexpr int countr_impl(std::uint64_t x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint64_t x) noexcept { return static_cast(x) != 0 ? countr_impl(static_cast(x)) : countr_impl(static_cast(x >> 32)) + 32; @@ -129,7 +129,7 @@ constexpr int countr_impl(std::uint64_t x) noexcept } // namespace impl template -constexpr int countr_zero(T x) noexcept +BOOST_INT128_HOST_DEVICE constexpr int countr_zero(T x) noexcept { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Can only count with unsigned integers"); diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 9b1ddfe5..071fec63 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -49,211 +49,211 @@ int128_t #endif // Defaulted basic construction - constexpr int128_t() noexcept = default; - constexpr int128_t(const int128_t&) noexcept = default; - constexpr int128_t(int128_t&&) noexcept = default; - constexpr int128_t& operator=(const int128_t&) noexcept = default; - constexpr int128_t& operator=(int128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t() noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t(const int128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t(int128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(const int128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(int128_t&&) noexcept = default; // Requires a conversion file to be implemented - explicit constexpr int128_t(const uint128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE - constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept : low{lo}, high{hi} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept : low{lo}, high{hi} {} template - constexpr int128_t(const SignedInteger v) noexcept : low {static_cast(v)}, high {v < 0 ? -1 : 0} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const SignedInteger v) noexcept : low {static_cast(v)}, high {v < 0 ? -1 : 0} {} template - constexpr int128_t(const UnsignedInteger v) noexcept : low {static_cast(v)}, high {} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const UnsignedInteger v) noexcept : low {static_cast(v)}, high {} {} #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) - BOOST_INT128_BUILTIN_CONSTEXPR int128_t(const detail::builtin_i128 v) noexcept : low {static_cast(v & static_cast(detail::low_word_mask))}, high {static_cast(v >> static_cast(64U))} {} - BOOST_INT128_BUILTIN_CONSTEXPR int128_t(const detail::builtin_u128 v) noexcept : low {static_cast(v & static_cast(detail::low_word_mask))}, high {static_cast(v >> static_cast(64U))} {} + BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t(const detail::builtin_i128 v) noexcept : low {static_cast(v & static_cast(detail::low_word_mask))}, high {static_cast(v >> static_cast(64U))} {} + BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t(const detail::builtin_u128 v) noexcept : low {static_cast(v & static_cast(detail::low_word_mask))}, high {static_cast(v >> static_cast(64U))} {} #endif // BOOST_INT128_HAS_INT128 #else // Big endian - constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept : high{hi}, low{lo} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept : high{hi}, low{lo} {} template - constexpr int128_t(const SignedInteger v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast(v)} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const SignedInteger v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast(v)} {} template - constexpr int128_t(const UnsignedInteger v) noexcept : high {}, low {static_cast(v)} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const UnsignedInteger v) noexcept : high {}, low {static_cast(v)} {} #ifdef BOOST_INT128_HAS_INT128 - constexpr int128_t(const detail::builtin_i128 v) noexcept : high {static_cast(v >> 64U)}, low {static_cast(v & detail::low_word_mask)} {} - constexpr int128_t(const detail::builtin_u128 v) noexcept : high {static_cast(v >> 64U)}, low {static_cast(v & detail::low_word_mask)} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const detail::builtin_i128 v) noexcept : high {static_cast(v >> 64U)}, low {static_cast(v & detail::low_word_mask)} {} + BOOST_INT128_HOST_DEVICE constexpr int128_t(const detail::builtin_u128 v) noexcept : high {static_cast(v >> 64U)}, low {static_cast(v & detail::low_word_mask)} {} #endif // BOOST_INT128_HAS_INT128 #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE // Integer Conversion operators - explicit constexpr operator bool() const noexcept { return low || high; } + BOOST_INT128_HOST_DEVICE explicit constexpr operator bool() const noexcept { return low || high; } template - explicit constexpr operator SignedInteger() const noexcept { return static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept { return static_cast(low); } template - explicit constexpr operator UnsignedInteger() const noexcept { return static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept { return static_cast(low); } #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) - explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast(static_cast(high) << static_cast(64)) | static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast(static_cast(high) << static_cast(64)) | static_cast(low); } - explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast(high) << static_cast(64)) | static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast(high) << static_cast(64)) | static_cast(low); } #endif // BOOST_INT128_HAS_INT128 // Conversion to float // This is basically the same as ldexp(static_cast(high), 64) + static_cast(low), // but can be constexpr at C++11 instead of C++26 - explicit constexpr operator float() const noexcept; - explicit constexpr operator double() const noexcept; - explicit constexpr operator long double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; // Compound Or template - constexpr int128_t& operator|=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator|=(Integer rhs) noexcept; - constexpr int128_t& operator|=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator|=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator|=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator|=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound And template - constexpr int128_t& operator&=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator&=(Integer rhs) noexcept; - constexpr int128_t& operator&=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator&=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator&=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator&=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound XOR template - constexpr int128_t& operator^=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator^=(Integer rhs) noexcept; - constexpr int128_t& operator^=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator^=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator^=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator^=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Left Shift template - constexpr int128_t& operator<<=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator<<=(Integer rhs) noexcept; - constexpr int128_t& operator<<=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator<<=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator<<=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator<<=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Right Shift template - constexpr int128_t& operator>>=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator>>=(Integer rhs) noexcept; - constexpr int128_t& operator>>=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator>>=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator>>=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator>>=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Prefix and postfix increment - constexpr int128_t& operator++() noexcept; - constexpr int128_t operator++(int) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator++() noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t operator++(int) noexcept; // Prefix and postfix decrment - constexpr int128_t& operator--() noexcept; - constexpr int128_t operator--(int) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator--() noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t operator--(int) noexcept; // Compound Addition template - constexpr int128_t& operator+=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator+=(Integer rhs) noexcept; - constexpr int128_t& operator+=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator+=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator+=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator+=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Subtraction template - constexpr int128_t& operator-=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator-=(Integer rhs) noexcept; - constexpr int128_t& operator-=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator-=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator-=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator-=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Multiplication template - constexpr int128_t& operator*=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator*=(Integer rhs) noexcept; - constexpr int128_t& operator*=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator*=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator*=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator*=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Division template - constexpr int128_t& operator/=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator/=(Integer rhs) noexcept; - constexpr int128_t& operator/=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator/=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator/=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator/=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Modulo template - constexpr int128_t& operator%=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator%=(Integer rhs) noexcept; - constexpr int128_t& operator%=(int128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator%=(int128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline int128_t& operator%=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline int128_t& operator%=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 }; @@ -262,7 +262,7 @@ int128_t // Absolute Value function //===================================== -BOOST_INT128_EXPORT constexpr int128_t abs(int128_t value) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t abs(int128_t value) noexcept { if (value.high < 0) { @@ -282,17 +282,17 @@ BOOST_INT128_EXPORT constexpr int128_t abs(int128_t value) noexcept // by 0xFFFFFFFF in order to generally replicate what ldexp is doing in the constexpr context. // We also avoid pulling in for the __float128 case where we would need ldexpq -constexpr int128_t::operator float() const noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t::operator float() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } -constexpr int128_t::operator double() const noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t::operator double() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } -constexpr int128_t::operator long double() const noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t::operator long double() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } @@ -301,12 +301,12 @@ constexpr int128_t::operator long double() const noexcept // Unary Operators //===================================== -BOOST_INT128_EXPORT constexpr int128_t operator+(const int128_t value) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t value) noexcept { return value; } -BOOST_INT128_EXPORT constexpr int128_t operator-(const int128_t value) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t value) noexcept { return (value.low == 0) ? int128_t{-value.high, 0} : int128_t{~value.high, ~value.low + 1}; @@ -316,12 +316,12 @@ BOOST_INT128_EXPORT constexpr int128_t operator-(const int128_t value) noexcept // Equality Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator==(const int128_t lhs, const bool rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const int128_t lhs, const bool rhs) noexcept { return lhs.high == 0 && lhs.low == static_cast(rhs); } -BOOST_INT128_EXPORT constexpr bool operator==(const bool lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const bool lhs, const int128_t rhs) noexcept { return rhs.high == 0 && rhs.low == static_cast(lhs); } @@ -336,7 +336,7 @@ BOOST_INT128_EXPORT constexpr bool operator==(const bool lhs, const int128_t rhs # pragma GCC diagnostic ignored "-Wsign-compare" #endif -BOOST_INT128_EXPORT constexpr bool operator==(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const int128_t lhs, const int128_t rhs) noexcept { // x64 and ARM64 like the values in opposite directions @@ -352,19 +352,19 @@ BOOST_INT128_EXPORT constexpr bool operator==(const int128_t lhs, const int128_t } BOOST_INT128_EXPORT template -constexpr bool operator==(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const int128_t lhs, const SignedInteger rhs) noexcept { return lhs.high == (rhs < 0 ? -1 : 0) && lhs.low == static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator==(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const SignedInteger lhs, const int128_t rhs) noexcept { return rhs.high == (lhs < 0 ? -1 : 0) && rhs.low == static_cast(lhs); } BOOST_INT128_EXPORT template -constexpr bool operator==(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -381,7 +381,7 @@ constexpr bool operator==(const int128_t lhs, const UnsignedInteger rhs) noexcep } BOOST_INT128_EXPORT template -constexpr bool operator==(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -399,24 +399,24 @@ constexpr bool operator==(const UnsignedInteger lhs, const int128_t rhs) noexcep #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs == static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) == rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs.high < 0 ? false : lhs == static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return rhs.high < 0 ? false : static_cast(lhs) == rhs; } @@ -424,14 +424,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; @@ -445,7 +445,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const int128_t) noexcept // Inequality Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator!=(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const int128_t lhs, const int128_t rhs) noexcept { // x64 and ARM64 like the values in opposite directions @@ -477,30 +477,30 @@ BOOST_INT128_EXPORT constexpr bool operator!=(const int128_t lhs, const int128_t #endif } -BOOST_INT128_EXPORT constexpr bool operator!=(const int128_t lhs, const bool rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const int128_t lhs, const bool rhs) noexcept { return lhs.high != 0 || lhs.low != static_cast(rhs); } -BOOST_INT128_EXPORT constexpr bool operator!=(const bool lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const bool lhs, const int128_t rhs) noexcept { return rhs.high != 0 || rhs.low != static_cast(lhs); } BOOST_INT128_EXPORT template -constexpr bool operator!=(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const int128_t lhs, const SignedInteger rhs) noexcept { return lhs.high != (rhs < 0 ? -1 : 0) || lhs.low != static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator!=(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const SignedInteger lhs, const int128_t rhs) noexcept { return rhs.high != (lhs < 0 ? -1 : 0) || rhs.low != static_cast(lhs); } BOOST_INT128_EXPORT template -constexpr bool operator!=(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -517,7 +517,7 @@ constexpr bool operator!=(const int128_t lhs, const UnsignedInteger rhs) noexcep } BOOST_INT128_EXPORT template -constexpr bool operator!=(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -535,24 +535,24 @@ constexpr bool operator!=(const UnsignedInteger lhs, const int128_t rhs) noexcep #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs != static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) != rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs.high < 0 ? true : lhs != static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return rhs.high < 0 ? true : static_cast(lhs) != rhs; } @@ -560,14 +560,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; @@ -581,7 +581,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const int128_t) noexcept // Less than Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator<(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator<(const int128_t lhs, const int128_t rhs) noexcept { // On ARM macs only with the clang compiler is casting to __int128 uniformly better (and seemingly cost free) #if defined(__aarch64__) && defined(__APPLE__) && defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -613,7 +613,7 @@ BOOST_INT128_EXPORT constexpr bool operator<(const int128_t lhs, const int128_t } BOOST_INT128_EXPORT template -constexpr bool operator<(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -630,7 +630,7 @@ constexpr bool operator<(const int128_t lhs, const UnsignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator<(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -647,7 +647,7 @@ constexpr bool operator<(const UnsignedInteger lhs, const int128_t rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator<(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const int128_t lhs, const SignedInteger rhs) noexcept { if (lhs.high < 0) { @@ -663,7 +663,7 @@ constexpr bool operator<(const int128_t lhs, const SignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator<(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const SignedInteger lhs, const int128_t rhs) noexcept { if (rhs.high < 0) { @@ -681,24 +681,24 @@ constexpr bool operator<(const SignedInteger lhs, const int128_t rhs) noexcept #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs < static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) < rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs.high < 0 ? false : lhs < static_cast(rhs); } -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return rhs.high < 0 ? true : static_cast(lhs) < rhs; } @@ -706,14 +706,14 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_u128 lhs, co #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; @@ -727,7 +727,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const int128_t) noexcept // Greater than Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator>(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator>(const int128_t lhs, const int128_t rhs) noexcept { // On ARM macs only with the clang compiler is casting to __int128 uniformly better (and seemingly cost free) #if defined(__aarch64__) && defined(__APPLE__) && defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -759,19 +759,19 @@ BOOST_INT128_EXPORT constexpr bool operator>(const int128_t lhs, const int128_t } BOOST_INT128_EXPORT template -constexpr bool operator>(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const int128_t lhs, const SignedInteger rhs) noexcept { return !(lhs < rhs) && !(lhs == rhs); } BOOST_INT128_EXPORT template -constexpr bool operator>(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const SignedInteger lhs, const int128_t rhs) noexcept { return !(lhs < rhs) && !(lhs == rhs); } BOOST_INT128_EXPORT template -constexpr bool operator>(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -788,7 +788,7 @@ constexpr bool operator>(const int128_t lhs, const UnsignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator>(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -806,24 +806,24 @@ constexpr bool operator>(const UnsignedInteger lhs, const int128_t rhs) noexcept #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs > static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) > rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs.high < 0 ? false : lhs > static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return rhs.high < 0 ? true : static_cast(lhs) > rhs; } @@ -831,14 +831,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail:: #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; @@ -852,7 +852,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const int128_t) noexcept // Less Equal Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator<=(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const int128_t lhs, const int128_t rhs) noexcept { // On ARM macs only with the clang compiler is casting to __int128 uniformly better (and seemingly cost free) #if defined(__aarch64__) && defined(__APPLE__) && defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -884,19 +884,19 @@ BOOST_INT128_EXPORT constexpr bool operator<=(const int128_t lhs, const int128_t } BOOST_INT128_EXPORT template -constexpr bool operator<=(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const int128_t lhs, const SignedInteger rhs) noexcept { return !(lhs > rhs); } BOOST_INT128_EXPORT template -constexpr bool operator<=(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const SignedInteger lhs, const int128_t rhs) noexcept { return !(lhs > rhs); } BOOST_INT128_EXPORT template -constexpr bool operator<=(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -913,7 +913,7 @@ constexpr bool operator<=(const int128_t lhs, const UnsignedInteger rhs) noexcep } BOOST_INT128_EXPORT template -constexpr bool operator<=(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -931,24 +931,24 @@ constexpr bool operator<=(const UnsignedInteger lhs, const int128_t rhs) noexcep #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs <= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) <= rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs.high < 0 ? true : lhs <= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return rhs.high < 0 ? false : static_cast(lhs) <= rhs; } @@ -956,14 +956,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail: #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; @@ -977,7 +977,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const int128_t) noexcept // Greater Equal Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator>=(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const int128_t lhs, const int128_t rhs) noexcept { // On ARM macs only with the clang compiler is casting to __int128 uniformly better (and seemingly cost free) #if defined(__aarch64__) && defined(__APPLE__) && defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -1009,19 +1009,19 @@ BOOST_INT128_EXPORT constexpr bool operator>=(const int128_t lhs, const int128_t } BOOST_INT128_EXPORT template -constexpr bool operator>=(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const int128_t lhs, const SignedInteger rhs) noexcept { return !(lhs < rhs); } BOOST_INT128_EXPORT template -constexpr bool operator>=(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const SignedInteger lhs, const int128_t rhs) noexcept { return !(lhs < rhs); } BOOST_INT128_EXPORT template -constexpr bool operator>=(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1038,7 +1038,7 @@ constexpr bool operator>=(const int128_t lhs, const UnsignedInteger rhs) noexcep } BOOST_INT128_EXPORT template -constexpr bool operator>=(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1056,24 +1056,24 @@ constexpr bool operator>=(const UnsignedInteger lhs, const int128_t rhs) noexcep #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs >= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) >= rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs.high < 0 ? false : lhs >= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return rhs.high < 0 ? true : static_cast(lhs) >= rhs; } @@ -1081,14 +1081,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail: #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return true; @@ -1104,7 +1104,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const int128_t) noexcept #ifdef BOOST_INT128_HAS_SPACESHIP_OPERATOR -BOOST_INT128_EXPORT constexpr std::strong_ordering operator<=>(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const int128_t lhs, const int128_t rhs) noexcept { if (lhs < rhs) { @@ -1121,7 +1121,7 @@ BOOST_INT128_EXPORT constexpr std::strong_ordering operator<=>(const int128_t lh } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const int128_t lhs, const SignedInteger rhs) noexcept { if (lhs < rhs) { @@ -1138,7 +1138,7 @@ constexpr std::strong_ordering operator<=>(const int128_t lhs, const SignedInteg } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const SignedInteger lhs, const int128_t rhs) noexcept { if (lhs < rhs) { @@ -1155,7 +1155,7 @@ constexpr std::strong_ordering operator<=>(const SignedInteger lhs, const int128 } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1183,7 +1183,7 @@ constexpr std::strong_ordering operator<=>(const int128_t lhs, const UnsignedInt } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1216,7 +1216,7 @@ constexpr std::strong_ordering operator<=>(const UnsignedInteger lhs, const int1 // Not Operator //===================================== -BOOST_INT128_EXPORT constexpr int128_t operator~(const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator~(const int128_t rhs) noexcept { return {~rhs.high, ~rhs.low}; } @@ -1225,25 +1225,25 @@ BOOST_INT128_EXPORT constexpr int128_t operator~(const int128_t rhs) noexcept // Or Operator //===================================== -BOOST_INT128_EXPORT constexpr int128_t operator|(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const int128_t rhs) noexcept { return {lhs.high | rhs.high, lhs.low | rhs.low}; } BOOST_INT128_EXPORT template -constexpr int128_t operator|(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const SignedInteger rhs) noexcept { return {lhs.high | (rhs < 0 ? -1 : 0), lhs.low | static_cast(rhs)}; } BOOST_INT128_EXPORT template -constexpr int128_t operator|(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const SignedInteger lhs, const int128_t rhs) noexcept { return {rhs.high | (lhs < 0 ? -1 : 0), static_cast(lhs) | rhs.low}; } BOOST_INT128_EXPORT template -constexpr int128_t operator|(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1260,7 +1260,7 @@ constexpr int128_t operator|(const int128_t lhs, const UnsignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator|(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1278,24 +1278,24 @@ constexpr int128_t operator|(const UnsignedInteger lhs, const int128_t rhs) noex #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr int128_t operator|(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs | static_cast(rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator|(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) | rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator|(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs | static_cast(rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator|(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) | rhs; } @@ -1303,14 +1303,14 @@ BOOST_INT128_EXPORT constexpr int128_t operator|(const detail::builtin_u128 lhs, #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator|(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator|(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -1325,7 +1325,7 @@ constexpr int128_t operator|(const T, const int128_t) noexcept //===================================== template -constexpr int128_t& int128_t::operator|=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator|=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -1335,7 +1335,7 @@ constexpr int128_t& int128_t::operator|=(const Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator|=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator|=(const int128_t rhs) noexcept { *this = *this | rhs; return *this; @@ -1344,7 +1344,7 @@ constexpr int128_t& int128_t::operator|=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator|=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator|=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -1360,25 +1360,25 @@ inline int128_t& int128_t::operator|=(const Integer rhs) noexcept // And Operator //===================================== -BOOST_INT128_EXPORT constexpr int128_t operator&(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const int128_t rhs) noexcept { return {lhs.high & rhs.high, lhs.low & rhs.low}; } BOOST_INT128_EXPORT template -constexpr int128_t operator&(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const SignedInteger rhs) noexcept { return {lhs.high & (rhs < 0 ? -1 : 0), lhs.low & static_cast(rhs)}; } BOOST_INT128_EXPORT template -constexpr int128_t operator&(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const SignedInteger lhs, const int128_t rhs) noexcept { return {rhs.high & (lhs < 0 ? -1 : 0), static_cast(lhs) & rhs.low}; } BOOST_INT128_EXPORT template -constexpr int128_t operator&(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1395,7 +1395,7 @@ constexpr int128_t operator&(const int128_t lhs, const UnsignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator&(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1413,24 +1413,24 @@ constexpr int128_t operator&(const UnsignedInteger lhs, const int128_t rhs) noex #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr int128_t operator&(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs & static_cast(rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator&(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) & rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator&(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs & static_cast(rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator&(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) & rhs; } @@ -1438,14 +1438,14 @@ BOOST_INT128_EXPORT constexpr int128_t operator&(const detail::builtin_u128 lhs, #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator&(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator&(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -1458,7 +1458,7 @@ constexpr int128_t operator&(const T, const int128_t) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator&=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator&=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -1475,7 +1475,7 @@ inline int128_t& int128_t::operator&=(const Integer rhs) noexcept //===================================== template -constexpr int128_t& int128_t::operator&=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator&=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -1485,7 +1485,7 @@ constexpr int128_t& int128_t::operator&=(const Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator&=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator&=(const int128_t rhs) noexcept { *this = *this & rhs; return *this; @@ -1495,25 +1495,25 @@ constexpr int128_t& int128_t::operator&=(const int128_t rhs) noexcept // XOR Operator //===================================== -BOOST_INT128_EXPORT constexpr int128_t operator^(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const int128_t rhs) noexcept { return {lhs.high ^ rhs.high, lhs.low ^ rhs.low}; } BOOST_INT128_EXPORT template -constexpr int128_t operator^(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const SignedInteger rhs) noexcept { return {lhs.high ^ (rhs < 0 ? -1 : 0), lhs.low ^ static_cast(rhs)}; } BOOST_INT128_EXPORT template -constexpr int128_t operator^(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const SignedInteger lhs, const int128_t rhs) noexcept { return {rhs.high ^ (lhs < 0 ? -1 : 0), static_cast(lhs) ^ rhs.low}; } BOOST_INT128_EXPORT template -constexpr int128_t operator^(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1530,7 +1530,7 @@ constexpr int128_t operator^(const int128_t lhs, const UnsignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator^(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1548,24 +1548,24 @@ constexpr int128_t operator^(const UnsignedInteger lhs, const int128_t rhs) noex #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr int128_t operator^(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs ^ static_cast(rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator^(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) ^ rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator^(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs ^ static_cast(rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator^(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) ^ rhs; } @@ -1573,14 +1573,14 @@ BOOST_INT128_EXPORT constexpr int128_t operator^(const detail::builtin_u128 lhs, #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator^(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator^(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -1595,7 +1595,7 @@ constexpr int128_t operator^(const T, const int128_t) noexcept //===================================== template -constexpr int128_t& int128_t::operator^=(Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator^=(Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -1605,7 +1605,7 @@ constexpr int128_t& int128_t::operator^=(Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator^=(int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator^=(int128_t rhs) noexcept { *this = *this ^ rhs; return *this; @@ -1614,7 +1614,7 @@ constexpr int128_t& int128_t::operator^=(int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator^=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator^=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -1633,7 +1633,7 @@ inline int128_t& int128_t::operator^=(const Integer rhs) noexcept namespace detail { template -constexpr int128_t default_ls_impl(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t default_ls_impl(const int128_t lhs, const Integer rhs) noexcept { static_assert(std::is_integral::value, "Only builtin types allowed"); @@ -1668,7 +1668,7 @@ constexpr int128_t default_ls_impl(const int128_t lhs, const Integer rhs) noexce } template -int128_t intrinsic_ls_impl(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE int128_t intrinsic_ls_impl(const int128_t lhs, const Integer rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) { @@ -1748,7 +1748,7 @@ int128_t intrinsic_ls_impl(const int128_t lhs, const Integer rhs) noexcept } // namespace detail BOOST_INT128_EXPORT template -constexpr int128_t operator<<(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator<<(const int128_t lhs, const Integer rhs) noexcept { #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION @@ -1768,7 +1768,7 @@ constexpr int128_t operator<<(const int128_t lhs, const Integer rhs) noexcept #endif } -constexpr int128_t operator<<(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator<<(const int128_t lhs, const int128_t rhs) noexcept { if (rhs.high != 0 || rhs.low >= 128) { @@ -1780,7 +1780,7 @@ constexpr int128_t operator<<(const int128_t lhs, const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(detail::builtin_u128) * 8}; @@ -1792,7 +1792,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator<<(const detail::buil return lhs << rhs.low; } -BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(detail::builtin_i128) * 8}; @@ -1807,7 +1807,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator<<(const detail::buil #endif BOOST_INT128_EXPORT template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator<<(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int operator<<(const SignedInteger lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(SignedInteger) * 8}; @@ -1820,7 +1820,7 @@ constexpr int operator<<(const SignedInteger lhs, const int128_t rhs) noexcept } BOOST_INT128_EXPORT template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned operator<<(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr unsigned operator<<(const UnsignedInteger lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(UnsignedInteger) * 8}; @@ -1838,13 +1838,13 @@ constexpr unsigned operator<<(const UnsignedInteger lhs, const int128_t rhs) noe #endif // _MSC_VER template -constexpr int128_t& int128_t::operator<<=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator<<=(const Integer rhs) noexcept { *this = *this << rhs; return *this; } -constexpr int128_t& int128_t::operator<<=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator<<=(const int128_t rhs) noexcept { *this = *this << rhs; return *this; @@ -1853,7 +1853,7 @@ constexpr int128_t& int128_t::operator<<=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator<<=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator<<=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -1876,7 +1876,7 @@ inline int128_t& int128_t::operator<<=(const Integer rhs) noexcept namespace detail { template -constexpr int128_t default_rs_impl(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t default_rs_impl(const int128_t lhs, const Integer rhs) noexcept { if (rhs >= 128 || rhs < 0 ) { @@ -1905,7 +1905,7 @@ constexpr int128_t default_rs_impl(const int128_t lhs, const Integer rhs) noexce } template -int128_t intrinsic_rs_impl(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE int128_t intrinsic_rs_impl(const int128_t lhs, const Integer rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) { @@ -1982,7 +1982,7 @@ int128_t intrinsic_rs_impl(const int128_t lhs, const Integer rhs) noexcept } // namespace detail BOOST_INT128_EXPORT template -constexpr int128_t operator>>(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator>>(const int128_t lhs, const Integer rhs) noexcept { #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION @@ -2002,7 +2002,7 @@ constexpr int128_t operator>>(const int128_t lhs, const Integer rhs) noexcept #endif } -BOOST_INT128_EXPORT constexpr int128_t operator>>(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator>>(const int128_t lhs, const int128_t rhs) noexcept { if (rhs.high != 0 || rhs.low >= 128) { @@ -2014,7 +2014,7 @@ BOOST_INT128_EXPORT constexpr int128_t operator>>(const int128_t lhs, const int1 #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(detail::builtin_u128) * 8}; @@ -2026,7 +2026,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator>>(const detail::buil return lhs << rhs.low; } -BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(detail::builtin_i128) * 8}; @@ -2041,7 +2041,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator>>(const detail::buil #endif BOOST_INT128_EXPORT template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator>>(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int operator>>(const SignedInteger lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(SignedInteger) * 8}; @@ -2054,7 +2054,7 @@ constexpr int operator>>(const SignedInteger lhs, const int128_t rhs) noexcept } BOOST_INT128_EXPORT template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned operator>>(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr unsigned operator>>(const UnsignedInteger lhs, const int128_t rhs) noexcept { constexpr auto bit_width {sizeof(UnsignedInteger) * 8}; @@ -2072,13 +2072,13 @@ constexpr unsigned operator>>(const UnsignedInteger lhs, const int128_t rhs) noe #endif // _MSC_VER template -constexpr int128_t& int128_t::operator>>=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator>>=(const Integer rhs) noexcept { *this = *this >> rhs; return *this; } -constexpr int128_t& int128_t::operator>>=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator>>=(const int128_t rhs) noexcept { *this = *this >> rhs; return *this; @@ -2087,7 +2087,7 @@ constexpr int128_t& int128_t::operator>>=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator>>=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator>>=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -2107,7 +2107,7 @@ inline int128_t& int128_t::operator>>=(const Integer rhs) noexcept // Increment Operators //===================================== -constexpr int128_t& int128_t::operator++() noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator++() noexcept { if (++low == UINT64_C(0)) { @@ -2117,7 +2117,7 @@ constexpr int128_t& int128_t::operator++() noexcept return *this; } -constexpr int128_t int128_t::operator++(int) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t int128_t::operator++(int) noexcept { const auto temp {*this}; ++(*this); @@ -2128,7 +2128,7 @@ constexpr int128_t int128_t::operator++(int) noexcept // Decrement Operators //===================================== -constexpr int128_t& int128_t::operator--() noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator--() noexcept { if (low-- == UINT64_C(0)) { @@ -2138,7 +2138,7 @@ constexpr int128_t& int128_t::operator--() noexcept return *this; } -constexpr int128_t int128_t::operator--(int) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t int128_t::operator--(int) noexcept { const auto temp {*this}; --(*this); @@ -2151,7 +2151,7 @@ constexpr int128_t int128_t::operator--(int) noexcept namespace detail { -BOOST_INT128_FORCE_INLINE constexpr int128_t library_add(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t library_add(const int128_t lhs, const int128_t rhs) noexcept { const auto new_low {lhs.low + rhs.low}; const auto new_high {static_cast(lhs.high) + @@ -2161,7 +2161,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t library_add(const int128_t lhs, con return int128_t{static_cast(new_high), new_low}; } -BOOST_INT128_FORCE_INLINE constexpr int128_t default_add(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_add(const int128_t lhs, const int128_t rhs) noexcept { #if (defined(__x86_64__) || (defined(__aarch64__) && !defined(__APPLE__))) && !defined(_WIN32) && defined(BOOST_INT128_HAS_INT128) @@ -2199,7 +2199,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_add(const int128_t lhs, con } template -BOOST_INT128_FORCE_INLINE constexpr int128_t default_add(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_add(const int128_t lhs, const Integer rhs) noexcept { const auto new_low {lhs.low + rhs}; const auto new_high {static_cast(lhs.high) + static_cast(new_low < lhs.low)}; @@ -2207,7 +2207,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_add(const int128_t lhs, con return int128_t{static_cast(new_high), new_low}; } -BOOST_INT128_FORCE_INLINE constexpr int128_t library_sub(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t library_sub(const int128_t lhs, const int128_t rhs) noexcept { const auto new_low {lhs.low - rhs.low}; const auto new_high {static_cast(lhs.high) - static_cast(rhs.high) - static_cast(lhs.low < rhs.low)}; @@ -2215,7 +2215,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t library_sub(const int128_t lhs, con return int128_t{static_cast(new_high), new_low}; } -BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, const int128_t rhs) noexcept { #if defined(BOOST_INT128_HAS_BUILTIN_SUB_OVERFLOW) && (!defined(__aarch64__) || defined(__APPLE__) || !defined(BOOST_INT128_HAS_INT128)) @@ -2252,7 +2252,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, con } template -BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, const Integer rhs) noexcept { const auto new_low {lhs.low - rhs}; const auto new_high {static_cast(lhs.high) - static_cast(new_low > lhs.low)}; @@ -2265,14 +2265,14 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, con // doing addition via subtraction is >10% faster in the benchmarks #if defined(__s390__) || defined(__s390x__) -constexpr int128_t operator+(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t lhs, const int128_t rhs) noexcept { return detail::default_sub(lhs, -rhs); } #else -BOOST_INT128_EXPORT constexpr int128_t operator+(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t lhs, const int128_t rhs) noexcept { return detail::default_add(lhs, rhs); } @@ -2280,7 +2280,7 @@ BOOST_INT128_EXPORT constexpr int128_t operator+(const int128_t lhs, const int12 #endif BOOST_INT128_EXPORT template -constexpr int128_t operator+(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2297,7 +2297,7 @@ constexpr int128_t operator+(const int128_t lhs, const UnsignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator+(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2314,13 +2314,13 @@ constexpr int128_t operator+(const UnsignedInteger lhs, const int128_t rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator+(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t lhs, const SignedInteger rhs) noexcept { return rhs > 0 ? detail::default_add(lhs, rhs) : detail::default_sub(lhs, -rhs); } BOOST_INT128_EXPORT template -constexpr int128_t operator+(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const SignedInteger lhs, const int128_t rhs) noexcept { return lhs > 0 ? detail::default_add(rhs, lhs) : detail::default_sub(rhs, -lhs); } @@ -2329,12 +2329,12 @@ constexpr int128_t operator+(const SignedInteger lhs, const int128_t rhs) noexce #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return detail::default_add(lhs, static_cast(rhs)); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return detail::default_add(rhs, static_cast(lhs)); } @@ -2342,14 +2342,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const deta #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -2357,12 +2357,12 @@ BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const T, const int128_t) noexc #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return detail::default_add(lhs, static_cast(rhs)); } -BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return detail::default_add(rhs, static_cast(lhs)); } @@ -2370,7 +2370,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const detail::builtin_i128 lhs #endif // BOOST_INT128_HAS_INT128 template -constexpr int128_t& int128_t::operator+=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator+=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -2380,7 +2380,7 @@ constexpr int128_t& int128_t::operator+=(const Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator+=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator+=(const int128_t rhs) noexcept { *this = *this + rhs; return *this; @@ -2389,7 +2389,7 @@ constexpr int128_t& int128_t::operator+=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator+=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator+=(const Integer rhs) noexcept { *this = *this + rhs; return *this; @@ -2401,13 +2401,13 @@ inline int128_t& int128_t::operator+=(const Integer rhs) noexcept // Subtraction Operators //===================================== -BOOST_INT128_EXPORT constexpr int128_t operator-(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t lhs, const int128_t rhs) noexcept { return detail::default_sub(lhs, rhs); } BOOST_INT128_EXPORT template -constexpr int128_t operator-(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2424,7 +2424,7 @@ constexpr int128_t operator-(const int128_t lhs, const UnsignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator-(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2441,13 +2441,13 @@ constexpr int128_t operator-(const UnsignedInteger lhs, const int128_t rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator-(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t lhs, const SignedInteger rhs) noexcept { return detail::default_sub(lhs, static_cast(rhs)); } BOOST_INT128_EXPORT template -constexpr int128_t operator-(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const SignedInteger lhs, const int128_t rhs) noexcept { return detail::default_sub(static_cast(lhs), rhs); } @@ -2456,12 +2456,12 @@ constexpr int128_t operator-(const SignedInteger lhs, const int128_t rhs) noexce #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs - static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) - rhs; } @@ -2469,14 +2469,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const deta #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -2484,12 +2484,12 @@ BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const T, const int128_t) noexc #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs - static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) - rhs; } @@ -2497,7 +2497,7 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const deta #endif template -constexpr int128_t& int128_t::operator-=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator-=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -2507,7 +2507,7 @@ constexpr int128_t& int128_t::operator-=(const Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator-=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator-=(const int128_t rhs) noexcept { *this = *this - rhs; return *this; @@ -2516,7 +2516,7 @@ constexpr int128_t& int128_t::operator-=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator-=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator-=(const Integer rhs) noexcept { *this = *this - rhs; return *this; @@ -2530,12 +2530,12 @@ inline int128_t& int128_t::operator-=(const Integer rhs) noexcept namespace detail { -BOOST_INT128_FORCE_INLINE constexpr int128_t signed_shift_left_32(const std::uint64_t low) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t signed_shift_left_32(const std::uint64_t low) noexcept { return {static_cast(low >> 32), low << 32}; } -BOOST_INT128_FORCE_INLINE constexpr int128_t library_mul(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t library_mul(const int128_t lhs, const int128_t rhs) noexcept { const auto a {lhs.low >> 32U}; const auto b {lhs.low & UINT32_MAX}; @@ -2548,7 +2548,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t library_mul(const int128_t lhs, con return result; } -BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint64_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint64_t rhs) noexcept { const auto low_res{lhs.low * rhs}; @@ -2571,7 +2571,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, con return {high_res, low_res}; } -BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint32_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint32_t rhs) noexcept { const auto low_res{lhs.low * rhs}; @@ -2585,7 +2585,7 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, con #if defined(_M_AMD64) && !defined(__GNUC__) -BOOST_INT128_FORCE_INLINE int128_t msvc_amd64_mul(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE int128_t msvc_amd64_mul(const int128_t lhs, const int128_t rhs) noexcept { int128_t result {}; result.low = _umul128(lhs.low, rhs.low, reinterpret_cast(&result.high)); @@ -2597,7 +2597,7 @@ BOOST_INT128_FORCE_INLINE int128_t msvc_amd64_mul(const int128_t lhs, const int1 #endif -BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const int128_t rhs) noexcept { #if ((defined(__aarch64__) && defined(__APPLE__)) || defined(__x86_64__) || defined(__PPC__) || defined(__powerpc__)) && defined(__GNUC__) && !defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -2680,13 +2680,13 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, con } // namespace detail -BOOST_INT128_EXPORT constexpr int128_t operator*(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const int128_t rhs) noexcept { return detail::default_mul(lhs, rhs); } BOOST_INT128_EXPORT template -constexpr int128_t operator*(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2704,7 +2704,7 @@ constexpr int128_t operator*(const int128_t lhs, const UnsignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator*(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2727,14 +2727,14 @@ constexpr int128_t operator*(const UnsignedInteger lhs, const int128_t rhs) noex #endif BOOST_INT128_EXPORT template -constexpr int128_t operator*(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const SignedInteger rhs) noexcept { return rhs < 0 ? -detail::default_mul(lhs, -static_cast(rhs)) : detail::default_mul(lhs, static_cast(rhs)); } BOOST_INT128_EXPORT template -constexpr int128_t operator*(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const SignedInteger lhs, const int128_t rhs) noexcept { return lhs < 0 ? -detail::default_mul(rhs, -static_cast(lhs)) : detail::default_mul(rhs, static_cast(lhs)); @@ -2748,12 +2748,12 @@ constexpr int128_t operator*(const SignedInteger lhs, const int128_t rhs) noexce #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator*(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return static_cast(static_cast(lhs) * rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator*(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(static_cast(rhs) * lhs); } @@ -2761,14 +2761,14 @@ BOOST_INT128_EXPORT constexpr int128_t operator*(const detail::builtin_u128 lhs, #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator*(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator*(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -2776,12 +2776,12 @@ constexpr int128_t operator*(const T, const int128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator*(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return detail::default_mul(lhs, static_cast(rhs)); } -BOOST_INT128_EXPORT constexpr int128_t operator*(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return detail::default_mul(rhs, static_cast(lhs)); } @@ -2789,7 +2789,7 @@ BOOST_INT128_EXPORT constexpr int128_t operator*(const detail::builtin_i128 lhs, #endif // BOOST_INT128_HAS_INT128 template -constexpr int128_t& int128_t::operator*=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator*=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -2799,7 +2799,7 @@ constexpr int128_t& int128_t::operator*=(const Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator*=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator*=(const int128_t rhs) noexcept { *this = *this * rhs; return *this; @@ -2808,7 +2808,7 @@ constexpr int128_t& int128_t::operator*=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator*=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator*=(const Integer rhs) noexcept { *this = *this * rhs; return *this; @@ -2825,7 +2825,7 @@ inline int128_t& int128_t::operator*=(const Integer rhs) noexcept # pragma clang diagnostic ignored "-Wassume" #endif -BOOST_INT128_EXPORT constexpr int128_t operator/(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const int128_t rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs == 0)) { @@ -2870,7 +2870,7 @@ BOOST_INT128_EXPORT constexpr int128_t operator/(const int128_t lhs, const int12 } BOOST_INT128_EXPORT template -constexpr int128_t operator/(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2898,7 +2898,7 @@ constexpr int128_t operator/(const int128_t lhs, const UnsignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator/(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2929,7 +2929,7 @@ constexpr int128_t operator/(const UnsignedInteger lhs, const int128_t rhs) noex } BOOST_INT128_EXPORT template -constexpr int128_t operator/(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const SignedInteger rhs) noexcept { using eval_type = detail::evaluation_type_t; @@ -2955,7 +2955,7 @@ constexpr int128_t operator/(const int128_t lhs, const SignedInteger rhs) noexce } BOOST_INT128_EXPORT template -constexpr int128_t operator/(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const SignedInteger lhs, const int128_t rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs == 0)) { @@ -2981,12 +2981,12 @@ constexpr int128_t operator/(const SignedInteger lhs, const int128_t rhs) noexce #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator/(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return static_cast(static_cast(lhs) / rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator/(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs / static_cast(rhs)); } @@ -2994,14 +2994,14 @@ BOOST_INT128_EXPORT constexpr int128_t operator/(const detail::builtin_u128 lhs, #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator/(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator/(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -3009,12 +3009,12 @@ constexpr int128_t operator/(const T, const int128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator/(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return static_cast(static_cast(lhs) / rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator/(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs / static_cast(rhs)); } @@ -3023,12 +3023,12 @@ BOOST_INT128_EXPORT constexpr int128_t operator/(const detail::builtin_i128 lhs, #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT inline int128_t operator/(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs / static_cast(rhs); } -BOOST_INT128_EXPORT inline int128_t operator/(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) / rhs; } @@ -3036,14 +3036,14 @@ BOOST_INT128_EXPORT inline int128_t operator/(const detail::builtin_u128 lhs, co #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -inline int128_t operator/(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t operator/(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -inline int128_t operator/(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t operator/(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -3051,12 +3051,12 @@ inline int128_t operator/(const T, const int128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT inline int128_t operator/(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs / static_cast(rhs); } -BOOST_INT128_EXPORT inline int128_t operator/(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) / rhs; } @@ -3064,7 +3064,7 @@ BOOST_INT128_EXPORT inline int128_t operator/(const detail::builtin_i128 lhs, co #endif // BOOST_INT128_HAS_INT128 template -constexpr int128_t& int128_t::operator/=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator/=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -3074,7 +3074,7 @@ constexpr int128_t& int128_t::operator/=(const Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator/=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator/=(const int128_t rhs) noexcept { *this = *this / rhs; return *this; @@ -3083,7 +3083,7 @@ constexpr int128_t& int128_t::operator/=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator/=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator/=(const Integer rhs) noexcept { *this = *this / rhs; return *this; @@ -3102,21 +3102,21 @@ inline int128_t& int128_t::operator/=(const Integer rhs) noexcept //===================================== BOOST_INT128_EXPORT template -constexpr int128_t operator%(int128_t lhs, UnsignedInteger rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(int128_t lhs, UnsignedInteger rhs) noexcept; BOOST_INT128_EXPORT template -constexpr int128_t operator%(UnsignedInteger lhs, int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(UnsignedInteger lhs, int128_t rhs) noexcept; BOOST_INT128_EXPORT template -constexpr int128_t operator%(int128_t lhs, SignedInteger rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(int128_t lhs, SignedInteger rhs) noexcept; BOOST_INT128_EXPORT template -constexpr int128_t operator%(SignedInteger lhs, int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(SignedInteger lhs, int128_t rhs) noexcept; -BOOST_INT128_EXPORT constexpr int128_t operator%(int128_t lhs, int128_t rhs) noexcept; +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(int128_t lhs, int128_t rhs) noexcept; template -constexpr int128_t operator%(const int128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const UnsignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -3147,7 +3147,7 @@ constexpr int128_t operator%(const int128_t lhs, const UnsignedInteger rhs) noex } template -constexpr int128_t operator%(const UnsignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const UnsignedInteger lhs, const int128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -3180,18 +3180,18 @@ constexpr int128_t operator%(const UnsignedInteger lhs, const int128_t rhs) noex } template -constexpr int128_t operator%(const int128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const SignedInteger rhs) noexcept { return lhs % static_cast(rhs); } template -constexpr int128_t operator%(const SignedInteger lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const SignedInteger lhs, const int128_t rhs) noexcept { return static_cast(lhs) % rhs; } -constexpr int128_t operator%(const int128_t lhs, const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const int128_t rhs) noexcept { if (rhs == 0) { @@ -3241,24 +3241,24 @@ constexpr int128_t operator%(const int128_t lhs, const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr int128_t operator%(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return static_cast(lhs) % rhs; } -BOOST_INT128_EXPORT constexpr int128_t operator%(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return lhs % static_cast(rhs); } #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr int128_t operator%(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return static_cast(static_cast(lhs) % rhs); } -BOOST_INT128_EXPORT constexpr int128_t operator%(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs % static_cast(rhs)); } @@ -3266,14 +3266,14 @@ BOOST_INT128_EXPORT constexpr int128_t operator%(const detail::builtin_u128 lhs, #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator%(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr int128_t operator%(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -3283,24 +3283,24 @@ constexpr int128_t operator%(const T, const int128_t) noexcept #elif defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT inline int128_t operator%(const int128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator%(const int128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs % static_cast(rhs); } -BOOST_INT128_EXPORT inline int128_t operator%(const detail::builtin_i128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator%(const detail::builtin_i128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) % rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT inline int128_t operator%(const int128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator%(const int128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs % static_cast(rhs); } -BOOST_INT128_EXPORT inline int128_t operator%(const detail::builtin_u128 lhs, const int128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator%(const detail::builtin_u128 lhs, const int128_t rhs) noexcept { return static_cast(lhs) % rhs; } @@ -3308,14 +3308,14 @@ BOOST_INT128_EXPORT inline int128_t operator%(const detail::builtin_u128 lhs, co #else // BOOST_INT128_ALLOW_SIGN_CONVERSION BOOST_INT128_EXPORT template ::value, bool> = true> -inline int128_t operator%(const int128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t operator%(const int128_t, const T) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -inline int128_t operator%(const T, const int128_t) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t operator%(const T, const int128_t) noexcept { static_assert(detail::is_signed_integer_v, "Sign Compare Error"); return {0, 0}; @@ -3326,7 +3326,7 @@ inline int128_t operator%(const T, const int128_t) noexcept #endif // BOOST_INT128_HAS_INT128 template -constexpr int128_t& int128_t::operator%=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator%=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_signed_integer_v, "Sign Conversion Error"); @@ -3336,7 +3336,7 @@ constexpr int128_t& int128_t::operator%=(const Integer rhs) noexcept return *this; } -constexpr int128_t& int128_t::operator%=(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator%=(const int128_t rhs) noexcept { *this = *this % rhs; return *this; @@ -3345,7 +3345,7 @@ constexpr int128_t& int128_t::operator%=(const int128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline int128_t& int128_t::operator%=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator%=(const Integer rhs) noexcept { *this = *this % rhs; return *this; diff --git a/include/boost/int128/detail/mini_from_chars.hpp b/include/boost/int128/detail/mini_from_chars.hpp index b6661b48..dfdf9b4f 100644 --- a/include/boost/int128/detail/mini_from_chars.hpp +++ b/include/boost/int128/detail/mini_from_chars.hpp @@ -43,13 +43,13 @@ BOOST_INT128_INLINE_CONSTEXPR unsigned char uchar_values[] = static_assert(sizeof(uchar_values) == 256, "uchar_values should represent all 256 values of unsigned char"); // Convert characters for 0-9, A-Z, a-z to 0-35. Anything else is 255 -BOOST_INT128_FORCE_INLINE constexpr auto digit_from_char(char val) noexcept -> unsigned char +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr auto digit_from_char(char val) noexcept -> unsigned char { return uchar_values[static_cast(val)]; } template -constexpr int from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept +BOOST_INT128_HOST_DEVICE constexpr int from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept { if (first >= last) { @@ -176,12 +176,12 @@ constexpr int from_chars_integer_impl(const char* first, const char* last, Integ } } // namespace impl -constexpr int from_chars(const char* first, const char* last, uint128_t& value, int base = 10) noexcept +BOOST_INT128_HOST_DEVICE constexpr int from_chars(const char* first, const char* last, uint128_t& value, int base = 10) noexcept { return impl::from_chars_integer_impl(first, last, value, base); } -constexpr int from_chars(const char* first, const char* last, int128_t& value, int base = 10) noexcept +BOOST_INT128_HOST_DEVICE constexpr int from_chars(const char* first, const char* last, int128_t& value, int base = 10) noexcept { return impl::from_chars_integer_impl(first, last, value, base); } diff --git a/include/boost/int128/detail/mini_to_chars.hpp b/include/boost/int128/detail/mini_to_chars.hpp index 0d491841..3355a369 100644 --- a/include/boost/int128/detail/mini_to_chars.hpp +++ b/include/boost/int128/detail/mini_to_chars.hpp @@ -26,7 +26,7 @@ BOOST_INT128_INLINE_CONSTEXPR char upper_case_digit_table[] = { static_assert(sizeof(upper_case_digit_table) == sizeof(char) * 16, "10 numbers, and 6 letters"); -constexpr char* mini_to_chars(char (&buffer)[64], uint128_t v, const int base, const bool uppercase) noexcept +BOOST_INT128_HOST_DEVICE constexpr char* mini_to_chars(char (&buffer)[64], uint128_t v, const int base, const bool uppercase) noexcept { char* last {buffer + 64U}; *--last = '\0'; @@ -81,7 +81,7 @@ constexpr char* mini_to_chars(char (&buffer)[64], uint128_t v, const int base, c return last; } -constexpr char* mini_to_chars(char (&buffer)[64], const int128_t v, const int base, const bool uppercase) noexcept +BOOST_INT128_HOST_DEVICE constexpr char* mini_to_chars(char (&buffer)[64], const int128_t v, const int base, const bool uppercase) noexcept { char* p {nullptr}; diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index eed003fb..21b4ebf5 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -50,33 +50,33 @@ uint128_t #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE // Defaulted basic construction - constexpr uint128_t() noexcept = default; - constexpr uint128_t(const uint128_t&) noexcept = default; - constexpr uint128_t(uint128_t&&) noexcept = default; - constexpr uint128_t& operator=(const uint128_t&) noexcept = default; - constexpr uint128_t& operator=(uint128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t() noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const uint128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(uint128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(const uint128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(uint128_t&&) noexcept = default; // Requires a conversion file to be implemented - explicit constexpr uint128_t(const int128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE - constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept : low {lo}, high {hi} {} + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept : low {lo}, high {hi} {} template - constexpr uint128_t(const SignedInteger v) noexcept : low {static_cast(v)}, high {v < 0 ? UINT64_MAX : UINT64_C(0)} {} + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const SignedInteger v) noexcept : low {static_cast(v)}, high {v < 0 ? UINT64_MAX : UINT64_C(0)} {} template - constexpr uint128_t(const UnsignedInteger v) noexcept : low {static_cast(v)}, high {} {} + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const UnsignedInteger v) noexcept : low {static_cast(v)}, high {} {} #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) - BOOST_INT128_BUILTIN_CONSTEXPR uint128_t(const detail::builtin_i128 v) noexcept : + BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t(const detail::builtin_i128 v) noexcept : low {static_cast(v)}, high {static_cast(static_cast(v) >> static_cast(64U))} {} - BOOST_INT128_BUILTIN_CONSTEXPR uint128_t(const detail::builtin_u128 v) noexcept : + BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t(const detail::builtin_u128 v) noexcept : low {static_cast(v)}, high {static_cast(v >> static_cast(64U))} {} @@ -84,21 +84,21 @@ uint128_t #else // Big endian - constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept : high {hi}, low {lo} {} + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept : high {hi}, low {lo} {} template - constexpr uint128_t(const SignedInteger v) noexcept : high {v < 0 ? UINT64_MAX : UINT64_C(0)}, low {static_cast(v)} {} + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const SignedInteger v) noexcept : high {v < 0 ? UINT64_MAX : UINT64_C(0)}, low {static_cast(v)} {} template - constexpr uint128_t(const UnsignedInteger v) noexcept : high {}, low {static_cast(v)} {} + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const UnsignedInteger v) noexcept : high {}, low {static_cast(v)} {} #ifdef BOOST_INT128_HAS_INT128 - constexpr uint128_t(const detail::builtin_i128 v) noexcept : + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const detail::builtin_i128 v) noexcept : high {static_cast(static_cast(v) >> 64U)}, low {static_cast(v)} {} - constexpr uint128_t(const detail::builtin_u128 v) noexcept : + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const detail::builtin_u128 v) noexcept : high {static_cast(v >> 64U)}, low {static_cast(v)} {} @@ -107,161 +107,161 @@ uint128_t #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE // Integer conversion operators - explicit constexpr operator bool() const noexcept {return low || high; } + BOOST_INT128_HOST_DEVICE explicit constexpr operator bool() const noexcept {return low || high; } template - explicit constexpr operator SignedInteger() const noexcept { return static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept { return static_cast(low); } template - explicit constexpr operator UnsignedInteger() const noexcept { return static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept { return static_cast(low); } #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) - explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast(static_cast(high) << static_cast(64)) | static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast(static_cast(high) << static_cast(64)) | static_cast(low); } - explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast(high) << static_cast(64)) | static_cast(low); } + BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast(high) << static_cast(64)) | static_cast(low); } #endif // BOOST_INT128_HAS_INT128 // Conversion to float // This is basically the same as ldexp(static_cast(high), 64) + static_cast(low), // but can be constexpr at C++11 instead of C++26 - explicit constexpr operator float() const noexcept; - explicit constexpr operator double() const noexcept; - explicit constexpr operator long double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; // Compound OR template - constexpr uint128_t& operator|=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator|=(Integer rhs) noexcept; - constexpr uint128_t& operator|=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator|=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator|=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator|=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound AND template - constexpr uint128_t& operator&=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator&=(Integer rhs) noexcept; - constexpr uint128_t& operator&=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator&=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator&=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator&=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound XOR template - constexpr uint128_t& operator^=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator^=(Integer rhs) noexcept; - constexpr uint128_t& operator^=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator^=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator^=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator^=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Left Shift template - constexpr uint128_t& operator<<=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator<<=(Integer rhs) noexcept; - constexpr uint128_t& operator<<=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator<<=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator<<=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator<<=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Right Shift template - constexpr uint128_t& operator>>=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator>>=(Integer rhs) noexcept; - constexpr uint128_t& operator>>=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator>>=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator>>=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator>>=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 - constexpr uint128_t& operator++() noexcept; - constexpr uint128_t operator++(int) noexcept; - constexpr uint128_t& operator--() noexcept; - constexpr uint128_t operator--(int) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator++() noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t operator++(int) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator--() noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t operator--(int) noexcept; // Compound Addition template - constexpr uint128_t& operator+=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator+=(Integer rhs) noexcept; - constexpr uint128_t& operator+=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator+=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator+=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator+=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Subtraction template - constexpr uint128_t& operator-=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator-=(Integer rhs) noexcept; - constexpr uint128_t& operator-=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator-=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator-=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator-=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Multiplication template - constexpr uint128_t& operator*=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator*=(Integer rhs) noexcept; - constexpr uint128_t& operator*=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator*=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator*=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator*=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound Division template - constexpr uint128_t& operator/=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator/=(Integer rhs) noexcept; - constexpr uint128_t& operator/=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator/=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator/=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator/=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 // Compound modulo template - constexpr uint128_t& operator%=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator%=(Integer rhs) noexcept; - constexpr uint128_t& operator%=(uint128_t rhs) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator%=(uint128_t rhs) noexcept; #ifdef BOOST_INT128_HAS_MSVC_INT128 template - inline uint128_t& operator%=(Integer rhs) noexcept; + BOOST_INT128_HOST_DEVICE inline uint128_t& operator%=(Integer rhs) noexcept; #endif // BOOST_INT128_HAS_MSVC_INT128 }; @@ -270,7 +270,7 @@ uint128_t // Absolute Value function //===================================== -BOOST_INT128_EXPORT constexpr uint128_t abs(const uint128_t value) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t abs(const uint128_t value) noexcept { return value; } @@ -284,17 +284,17 @@ BOOST_INT128_EXPORT constexpr uint128_t abs(const uint128_t value) noexcept // by 0xFFFFFFFF in order to generally replicate what ldexp is doing in the constexpr context. // We also avoid pulling in for the __float128 case where we would need ldexpq -constexpr uint128_t::operator float() const noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator float() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } -constexpr uint128_t::operator double() const noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator double() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } -constexpr uint128_t::operator long double() const noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator long double() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } @@ -303,12 +303,12 @@ constexpr uint128_t::operator long double() const noexcept // Unary Operators //===================================== -BOOST_INT128_EXPORT constexpr uint128_t operator+(const uint128_t value) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const uint128_t value) noexcept { return value; } -BOOST_INT128_EXPORT constexpr uint128_t operator-(const uint128_t value) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const uint128_t value) noexcept { return {~value.high + static_cast(value.low == UINT64_C(0)), ~value.low + UINT64_C(1)}; } @@ -317,12 +317,12 @@ BOOST_INT128_EXPORT constexpr uint128_t operator-(const uint128_t value) noexcep // Equality Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator==(const uint128_t lhs, const bool rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uint128_t lhs, const bool rhs) noexcept { return lhs.high == UINT64_C(0) && lhs.low == static_cast(rhs); } -BOOST_INT128_EXPORT constexpr bool operator==(const bool lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const bool lhs, const uint128_t rhs) noexcept { return rhs.high == UINT64_C(0) && rhs.low == static_cast(lhs); } @@ -336,7 +336,7 @@ BOOST_INT128_EXPORT constexpr bool operator==(const bool lhs, const uint128_t rh #endif BOOST_INT128_EXPORT template -constexpr bool operator==(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -353,7 +353,7 @@ constexpr bool operator==(const uint128_t lhs, const SignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator==(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -370,18 +370,18 @@ constexpr bool operator==(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator==(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return lhs.high == UINT64_C(0) && lhs.low == static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator==(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return rhs.high == UINT64_C(0) && rhs.low == static_cast(lhs); } -BOOST_INT128_EXPORT constexpr bool operator==(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uint128_t lhs, const uint128_t rhs) noexcept { #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_AMD64) @@ -417,12 +417,12 @@ BOOST_INT128_EXPORT constexpr bool operator==(const uint128_t lhs, const uint128 #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs == static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) == rhs; } @@ -430,14 +430,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; @@ -445,12 +445,12 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const uint128_t) noexcep #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs == static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) == rhs; } @@ -461,18 +461,18 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail: // Inequality Operators //===================================== -BOOST_INT128_EXPORT constexpr bool operator!=(const uint128_t lhs, const bool rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uint128_t lhs, const bool rhs) noexcept { return lhs.high != UINT64_C(0) || lhs.low != static_cast(rhs); } -BOOST_INT128_EXPORT constexpr bool operator!=(const bool lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const bool lhs, const uint128_t rhs) noexcept { return rhs.high != UINT64_C(0) || rhs.low != static_cast(lhs); } BOOST_INT128_EXPORT template -constexpr bool operator!=(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -489,7 +489,7 @@ constexpr bool operator!=(const uint128_t lhs, const SignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator!=(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -506,18 +506,18 @@ constexpr bool operator!=(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator!=(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return lhs.high != UINT64_C(0) || lhs.low != static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator!=(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return rhs.high != UINT64_C(0) || rhs.low != static_cast(lhs); } -BOOST_INT128_EXPORT constexpr bool operator!=(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uint128_t lhs, const uint128_t rhs) noexcept { #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_AMD64) @@ -553,12 +553,12 @@ BOOST_INT128_EXPORT constexpr bool operator!=(const uint128_t lhs, const uint128 #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs != static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) != rhs; } @@ -566,14 +566,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; @@ -581,12 +581,12 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const uint128_t) noexcep #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs != static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) != rhs; } @@ -598,7 +598,7 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail: //===================================== BOOST_INT128_EXPORT template -constexpr bool operator<(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -615,7 +615,7 @@ constexpr bool operator<(const uint128_t lhs, const SignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator<(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -632,18 +632,18 @@ constexpr bool operator<(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator<(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return lhs.high == UINT64_C(0) && lhs.low < static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator<(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return rhs.high > UINT64_C(0) || static_cast(lhs) < rhs.low; } -BOOST_INT128_EXPORT constexpr bool operator<(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator<(const uint128_t lhs, const uint128_t rhs) noexcept { // On ARM macs only with the clang compiler is casting to unsigned __int128 uniformly better (and seemingly cost free) #if defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -709,12 +709,12 @@ BOOST_INT128_EXPORT constexpr bool operator<(const uint128_t lhs, const uint128_ #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs < static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) < rhs; } @@ -722,14 +722,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail:: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; @@ -737,12 +737,12 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const uint128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs < static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) < rhs; } @@ -754,7 +754,7 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail:: //===================================== BOOST_INT128_EXPORT template -constexpr bool operator<=(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -771,7 +771,7 @@ constexpr bool operator<=(const uint128_t lhs, const SignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator<=(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -788,18 +788,18 @@ constexpr bool operator<=(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator<=(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return lhs.high == UINT64_C(0) && lhs.low <= static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator<=(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return rhs.high > UINT64_C(0) || static_cast(lhs) <= rhs.low; } -BOOST_INT128_EXPORT constexpr bool operator<=(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const uint128_t lhs, const uint128_t rhs) noexcept { #if defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -864,22 +864,22 @@ BOOST_INT128_EXPORT constexpr bool operator<=(const uint128_t lhs, const uint128 #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs <= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) <= rhs; } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs <= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) <= rhs; } @@ -887,14 +887,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; @@ -909,7 +909,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const uint128_t) noexcep //===================================== BOOST_INT128_EXPORT template -constexpr bool operator>(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -926,7 +926,7 @@ constexpr bool operator>(const uint128_t lhs, const SignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator>(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -943,18 +943,18 @@ constexpr bool operator>(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator>(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return lhs.high > UINT64_C(0) || lhs.low > static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator>(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return rhs.high == UINT64_C(0) && static_cast(lhs) > rhs.low; } -BOOST_INT128_EXPORT constexpr bool operator>(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator>(const uint128_t lhs, const uint128_t rhs) noexcept { #if defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -1019,22 +1019,22 @@ BOOST_INT128_EXPORT constexpr bool operator>(const uint128_t lhs, const uint128_ #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs > static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) > rhs; } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs > static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) > rhs; } @@ -1042,14 +1042,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail:: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; @@ -1064,7 +1064,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const uint128_t) noexcept //===================================== BOOST_INT128_EXPORT template -constexpr bool operator>=(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1081,7 +1081,7 @@ constexpr bool operator>=(const uint128_t lhs, const SignedInteger rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator>=(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1098,18 +1098,18 @@ constexpr bool operator>=(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template -constexpr bool operator>=(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return lhs.high > UINT64_C(0) || lhs.low >= static_cast(rhs); } BOOST_INT128_EXPORT template -constexpr bool operator>=(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return rhs.high == UINT64_C(0) && static_cast(lhs) >= rhs.low; } -BOOST_INT128_EXPORT constexpr bool operator>=(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const uint128_t lhs, const uint128_t rhs) noexcept { #if defined(__clang__) && defined(BOOST_INT128_HAS_INT128) @@ -1172,24 +1172,24 @@ BOOST_INT128_EXPORT constexpr bool operator>=(const uint128_t lhs, const uint128 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs >= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) >= rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs >= static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) >= rhs; } @@ -1197,14 +1197,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail: #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Compare Error"); return true; @@ -1220,7 +1220,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const uint128_t) noexcep #ifdef BOOST_INT128_HAS_SPACESHIP_OPERATOR -BOOST_INT128_EXPORT constexpr std::strong_ordering operator<=>(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const uint128_t lhs, const uint128_t rhs) noexcept { if (lhs < rhs) { @@ -1237,7 +1237,7 @@ BOOST_INT128_EXPORT constexpr std::strong_ordering operator<=>(const uint128_t l } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const uint128_t lhs, const UnsignedInteger rhs) noexcept { if (lhs < rhs) { @@ -1254,7 +1254,7 @@ constexpr std::strong_ordering operator<=>(const uint128_t lhs, const UnsignedIn } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const UnsignedInteger lhs, const uint128_t rhs) noexcept { if (lhs < rhs) { @@ -1271,7 +1271,7 @@ constexpr std::strong_ordering operator<=>(const UnsignedInteger lhs, const uint } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1299,7 +1299,7 @@ constexpr std::strong_ordering operator<=>(const SignedInteger lhs, const uint12 } BOOST_INT128_EXPORT template -constexpr std::strong_ordering operator<=>(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE @@ -1332,7 +1332,7 @@ constexpr std::strong_ordering operator<=>(const uint128_t lhs, const SignedInte // Not Operator //===================================== -BOOST_INT128_EXPORT constexpr uint128_t operator~(const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator~(const uint128_t rhs) noexcept { return {~rhs.high, ~rhs.low}; } @@ -1342,7 +1342,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator~(const uint128_t rhs) noexcept //===================================== BOOST_INT128_EXPORT template -constexpr uint128_t operator|(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1359,7 +1359,7 @@ constexpr uint128_t operator|(const uint128_t lhs, const SignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator|(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1376,18 +1376,18 @@ constexpr uint128_t operator|(const SignedInteger lhs, const uint128_t rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator|(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return {lhs.high, lhs.low | static_cast(rhs)}; } BOOST_INT128_EXPORT template -constexpr uint128_t operator|(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return {rhs.high, rhs.low | static_cast(lhs)}; } -BOOST_INT128_EXPORT constexpr uint128_t operator|(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const uint128_t rhs) noexcept { return {lhs.high | rhs.high, lhs.low | rhs.low}; } @@ -1396,12 +1396,12 @@ BOOST_INT128_EXPORT constexpr uint128_t operator|(const uint128_t lhs, const uin #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator|(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs | static_cast(rhs); } -BOOST_INT128_EXPORT constexpr uint128_t operator|(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) | rhs; } @@ -1409,14 +1409,14 @@ BOOST_INT128_EXPORT constexpr uint128_t operator|(const detail::builtin_i128 lhs #else BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator|(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator|(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -1424,12 +1424,12 @@ constexpr uint128_t operator|(const T, const uint128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator|(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs | static_cast(rhs); } -BOOST_INT128_EXPORT constexpr uint128_t operator|(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) | rhs; } @@ -1437,7 +1437,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator|(const detail::builtin_u128 lhs #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator|=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator|=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -1446,7 +1446,7 @@ constexpr uint128_t& uint128_t::operator|=(const Integer rhs) noexcept *this = *this | rhs; return *this; } -constexpr uint128_t& uint128_t::operator|=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator|=(const uint128_t rhs) noexcept { *this = *this | rhs; return *this; @@ -1455,7 +1455,7 @@ constexpr uint128_t& uint128_t::operator|=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator|=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator|=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -1472,7 +1472,7 @@ inline uint128_t& uint128_t::operator|=(const Integer rhs) noexcept //===================================== BOOST_INT128_EXPORT template -constexpr uint128_t operator&(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1489,7 +1489,7 @@ constexpr uint128_t operator&(const uint128_t lhs, const SignedInteger rhs) noex } template -constexpr uint128_t operator&(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1506,18 +1506,18 @@ constexpr uint128_t operator&(const SignedInteger lhs, const uint128_t rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator&(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return {lhs.high, lhs.low & static_cast(rhs)}; } BOOST_INT128_EXPORT template -constexpr uint128_t operator&(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return {rhs.high, rhs.low & static_cast(lhs)}; } -BOOST_INT128_EXPORT constexpr uint128_t operator&(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const uint128_t rhs) noexcept { return {lhs.high & rhs.high, lhs.low & rhs.low}; } @@ -1526,12 +1526,12 @@ BOOST_INT128_EXPORT constexpr uint128_t operator&(const uint128_t lhs, const uin #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator&(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs & static_cast(rhs); } -BOOST_INT128_EXPORT constexpr uint128_t operator&(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) & rhs; } @@ -1539,14 +1539,14 @@ BOOST_INT128_EXPORT constexpr uint128_t operator&(const detail::builtin_i128 lhs #else BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator&(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator&(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -1554,12 +1554,12 @@ constexpr uint128_t operator&(const T, const uint128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator&(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs & static_cast(rhs); } -BOOST_INT128_EXPORT constexpr uint128_t operator&(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) & rhs; } @@ -1567,7 +1567,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator&(const detail::builtin_u128 lhs #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator&=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator&=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -1577,7 +1577,7 @@ constexpr uint128_t& uint128_t::operator&=(const Integer rhs) noexcept return *this; } -constexpr uint128_t& uint128_t::operator&=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator&=(const uint128_t rhs) noexcept { *this = *this & rhs; return *this; @@ -1586,7 +1586,7 @@ constexpr uint128_t& uint128_t::operator&=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator&=(Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator&=(Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -1604,7 +1604,7 @@ inline uint128_t& uint128_t::operator&=(Integer rhs) noexcept //===================================== BOOST_INT128_EXPORT template -constexpr uint128_t operator^(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1621,7 +1621,7 @@ constexpr uint128_t operator^(const uint128_t lhs, const SignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator^(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -1638,18 +1638,18 @@ constexpr uint128_t operator^(const SignedInteger lhs, const uint128_t rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator^(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return {lhs.high, lhs.low ^ static_cast(rhs)}; } BOOST_INT128_EXPORT template -constexpr uint128_t operator^(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return {rhs.high, rhs.low ^ static_cast(lhs)}; } -BOOST_INT128_EXPORT constexpr uint128_t operator^(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const uint128_t rhs) noexcept { return {lhs.high ^ rhs.high, lhs.low ^ rhs.low}; } @@ -1658,12 +1658,12 @@ BOOST_INT128_EXPORT constexpr uint128_t operator^(const uint128_t lhs, const uin #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator^(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs ^ static_cast(rhs); } -BOOST_INT128_EXPORT constexpr uint128_t operator^(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) ^ rhs; } @@ -1671,14 +1671,14 @@ BOOST_INT128_EXPORT constexpr uint128_t operator^(const detail::builtin_i128 lhs #else BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator^(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator^(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -1686,12 +1686,12 @@ constexpr uint128_t operator^(const T, const uint128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator^(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs ^ static_cast(rhs); } -BOOST_INT128_EXPORT constexpr uint128_t operator^(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) ^ rhs; } @@ -1699,7 +1699,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator^(const detail::builtin_u128 lhs #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator^=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator^=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -1709,7 +1709,7 @@ constexpr uint128_t& uint128_t::operator^=(const Integer rhs) noexcept return *this; } -constexpr uint128_t& uint128_t::operator^=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator^=(const uint128_t rhs) noexcept { *this = *this ^ rhs; return *this; @@ -1718,7 +1718,7 @@ constexpr uint128_t& uint128_t::operator^=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator^=(Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator^=(Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -1737,7 +1737,7 @@ inline uint128_t& uint128_t::operator^=(Integer rhs) noexcept namespace detail { template -constexpr uint128_t default_ls_impl(const uint128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t default_ls_impl(const uint128_t lhs, const Integer rhs) noexcept { static_assert(std::is_integral::value, "Needs to be a builtin type"); @@ -1768,7 +1768,7 @@ constexpr uint128_t default_ls_impl(const uint128_t lhs, const Integer rhs) noex } template -uint128_t intrinsic_ls_impl(const uint128_t lhs, const T rhs) noexcept +BOOST_INT128_HOST_DEVICE uint128_t intrinsic_ls_impl(const uint128_t lhs, const T rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) { @@ -1829,7 +1829,7 @@ uint128_t intrinsic_ls_impl(const uint128_t lhs, const T rhs) noexcept } // namespace detail BOOST_INT128_EXPORT template -constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept { #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION @@ -1851,7 +1851,7 @@ constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept // A number of different overloads to ensure that we return the same type as the builtins would -BOOST_INT128_EXPORT constexpr uint128_t operator<<(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const uint128_t rhs) noexcept { if (rhs.high > UINT64_C(0) || rhs.low >= UINT64_C(128)) { @@ -1863,7 +1863,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator<<(const uint128_t lhs, const ui #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { constexpr auto bit_width {sizeof(detail::builtin_u128 ) * 8}; @@ -1875,7 +1875,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator<<(const detail::buil return lhs << rhs.low; } -BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { constexpr auto bit_width {sizeof(detail::builtin_u128) * 8}; @@ -1890,7 +1890,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator<<(const detail::buil #endif BOOST_INT128_EXPORT template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept { constexpr auto bit_width {sizeof(SignedInteger) * 8}; @@ -1903,7 +1903,7 @@ constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs) noexcept { constexpr auto bit_width {sizeof(UnsignedInteger) * 8}; @@ -1916,13 +1916,13 @@ constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs } template -constexpr uint128_t& uint128_t::operator<<=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator<<=(const Integer rhs) noexcept { *this = *this << rhs; return *this; } -constexpr uint128_t& uint128_t::operator<<=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator<<=(const uint128_t rhs) noexcept { *this = *this << rhs; return *this; @@ -1931,7 +1931,7 @@ constexpr uint128_t& uint128_t::operator<<=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator<<=(Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator<<=(Integer rhs) noexcept { *this = *this << rhs; return *this; @@ -1946,7 +1946,7 @@ inline uint128_t& uint128_t::operator<<=(Integer rhs) noexcept namespace detail { template -constexpr uint128_t default_rs_impl(const uint128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t default_rs_impl(const uint128_t lhs, const Integer rhs) noexcept { if (rhs < 0 || rhs >= 128) { @@ -1975,7 +1975,7 @@ constexpr uint128_t default_rs_impl(const uint128_t lhs, const Integer rhs) noex } template -uint128_t intrinsic_rs_impl(const uint128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE uint128_t intrinsic_rs_impl(const uint128_t lhs, const Integer rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) { @@ -2033,7 +2033,7 @@ uint128_t intrinsic_rs_impl(const uint128_t lhs, const Integer rhs) noexcept } // namespace detail BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator>>(const uint128_t lhs, const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator>>(const uint128_t lhs, const Integer rhs) noexcept { #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION @@ -2053,7 +2053,7 @@ constexpr uint128_t operator>>(const uint128_t lhs, const Integer rhs) noexcept #endif } -BOOST_INT128_EXPORT constexpr uint128_t operator>>(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator>>(const uint128_t lhs, const uint128_t rhs) noexcept { if (rhs.high > UINT64_C(0) || rhs.low >= UINT64_C(128)) { @@ -2065,7 +2065,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator>>(const uint128_t lhs, const ui #ifdef BOOST_INT128_HAS_INT128 -BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { constexpr auto bit_width = sizeof(detail::builtin_u128) * 8; @@ -2077,7 +2077,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_u128 operator>>(const detail::buil return lhs >> rhs.low; } -BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { constexpr auto bit_width = sizeof(detail::builtin_i128) * 8; @@ -2092,7 +2092,7 @@ BOOST_INT128_EXPORT constexpr detail::builtin_i128 operator>>(const detail::buil #endif BOOST_INT128_EXPORT template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator>>(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int operator>>(const SignedInteger lhs, const uint128_t rhs) noexcept { constexpr auto bit_width = sizeof(SignedInteger) * 8; @@ -2105,7 +2105,7 @@ constexpr int operator>>(const SignedInteger lhs, const uint128_t rhs) noexcept } BOOST_INT128_EXPORT template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned operator>>(UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr unsigned operator>>(UnsignedInteger lhs, const uint128_t rhs) noexcept { constexpr auto bit_width = sizeof(UnsignedInteger) * 8; @@ -2118,13 +2118,13 @@ constexpr unsigned operator>>(UnsignedInteger lhs, const uint128_t rhs) noexcept } template -constexpr uint128_t& uint128_t::operator>>=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator>>=(const Integer rhs) noexcept { *this = *this >> rhs; return *this; } -constexpr uint128_t& uint128_t::operator>>=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator>>=(const uint128_t rhs) noexcept { *this = *this >> rhs; return *this; @@ -2133,7 +2133,7 @@ constexpr uint128_t& uint128_t::operator>>=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator>>=(Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator>>=(Integer rhs) noexcept { *this = *this >> rhs; return *this; @@ -2145,7 +2145,7 @@ inline uint128_t& uint128_t::operator>>=(Integer rhs) noexcept // Increment Operator //===================================== -constexpr uint128_t& uint128_t::operator++() noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator++() noexcept { if (++low == UINT64_C(0)) { @@ -2155,7 +2155,7 @@ constexpr uint128_t& uint128_t::operator++() noexcept return *this; } -constexpr uint128_t uint128_t::operator++(int) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t uint128_t::operator++(int) noexcept { const auto temp {*this}; ++(*this); @@ -2166,7 +2166,7 @@ constexpr uint128_t uint128_t::operator++(int) noexcept // Decrement Operator //===================================== -constexpr uint128_t& uint128_t::operator--() noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator--() noexcept { if (--low == UINT64_MAX) { @@ -2176,7 +2176,7 @@ constexpr uint128_t& uint128_t::operator--() noexcept return *this; } -constexpr uint128_t uint128_t::operator--(int) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t uint128_t::operator--(int) noexcept { const auto temp {*this}; --(*this); @@ -2189,7 +2189,7 @@ constexpr uint128_t uint128_t::operator--(int) noexcept namespace impl { -BOOST_INT128_FORCE_INLINE constexpr uint128_t default_add(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_add(const uint128_t lhs, const uint128_t rhs) noexcept { #if defined(BOOST_INT128_HAS_BUILTIN_ADD_OVERFLOW) && (defined(__i386__) || (defined(__aarch64__) && !defined(__APPLE__)) || defined(__arm__) || (defined(__s390__) || defined(__s390x__))) @@ -2212,7 +2212,7 @@ BOOST_INT128_FORCE_INLINE constexpr uint128_t default_add(const uint128_t lhs, c #endif } -BOOST_INT128_FORCE_INLINE constexpr uint128_t default_add(const uint128_t lhs, const std::uint64_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_add(const uint128_t lhs, const std::uint64_t rhs) noexcept { #if defined(BOOST_INT128_HAS_BUILTIN_ADD_OVERFLOW) && (defined(__i386__) || (defined(__aarch64__) && !defined(__APPLE__)) || defined(__arm__) || (defined(__s390__) || defined(__s390x__))) @@ -2235,7 +2235,7 @@ BOOST_INT128_FORCE_INLINE constexpr uint128_t default_add(const uint128_t lhs, c #endif } -BOOST_INT128_FORCE_INLINE constexpr uint128_t default_sub(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_sub(const uint128_t lhs, const uint128_t rhs) noexcept { #if defined(BOOST_INT128_HAS_BUILTIN_SUB_OVERFLOW) && (defined(__i386__) || defined(__arm__) || (defined(__s390__) || defined(__s390x__))) @@ -2263,7 +2263,7 @@ BOOST_INT128_FORCE_INLINE constexpr uint128_t default_sub(const uint128_t lhs, c #endif } -BOOST_INT128_FORCE_INLINE constexpr uint128_t default_sub(const uint128_t lhs, const std::uint64_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_sub(const uint128_t lhs, const std::uint64_t rhs) noexcept { #if defined(BOOST_INT128_HAS_BUILTIN_SUB_OVERFLOW) && (defined(__i386__) || (defined(__aarch64__) && !defined(__APPLE__)) || defined(__arm__) || (defined(__s390__) || defined(__s390x__))) @@ -2295,7 +2295,7 @@ BOOST_INT128_FORCE_INLINE constexpr uint128_t default_sub(const uint128_t lhs, c #endif BOOST_INT128_EXPORT template -constexpr uint128_t operator+(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2313,7 +2313,7 @@ constexpr uint128_t operator+(const uint128_t lhs, const SignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator+(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2335,18 +2335,18 @@ constexpr uint128_t operator+(const SignedInteger lhs, const uint128_t rhs) noex #endif BOOST_INT128_EXPORT template -constexpr uint128_t operator+(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return impl::default_add(lhs, static_cast(rhs)); } BOOST_INT128_EXPORT template -constexpr uint128_t operator+(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return impl::default_add(rhs, static_cast(lhs)); } -BOOST_INT128_EXPORT constexpr uint128_t operator+(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const uint128_t lhs, const uint128_t rhs) noexcept { return impl::default_add(lhs, rhs); } @@ -2355,12 +2355,12 @@ BOOST_INT128_EXPORT constexpr uint128_t operator+(const uint128_t lhs, const uin #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return impl::default_add(lhs, static_cast(rhs)); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return impl::default_add(static_cast(lhs), rhs); } @@ -2368,14 +2368,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const det #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -2383,12 +2383,12 @@ BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const T, const uint128_t) noe #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return impl::default_add(lhs, static_cast(rhs)); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return impl::default_add(static_cast(lhs), rhs); } @@ -2396,7 +2396,7 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const det #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator+=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator+=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -2406,7 +2406,7 @@ constexpr uint128_t& uint128_t::operator+=(const Integer rhs) noexcept return *this; } -constexpr uint128_t& uint128_t::operator+=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator+=(const uint128_t rhs) noexcept { *this = *this + rhs; return *this; @@ -2415,7 +2415,7 @@ constexpr uint128_t& uint128_t::operator+=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator+=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator+=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -2438,7 +2438,7 @@ inline uint128_t& uint128_t::operator+=(const Integer rhs) noexcept #endif BOOST_INT128_EXPORT template -constexpr uint128_t operator-(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2456,7 +2456,7 @@ constexpr uint128_t operator-(const uint128_t lhs, const SignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator-(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2478,18 +2478,18 @@ constexpr uint128_t operator-(const SignedInteger lhs, const uint128_t rhs) noex #endif BOOST_INT128_EXPORT template -constexpr uint128_t operator-(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return impl::default_sub(lhs, static_cast(rhs)); } BOOST_INT128_EXPORT template -constexpr uint128_t operator-(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return impl::default_add(-rhs, static_cast(lhs)); } -BOOST_INT128_EXPORT constexpr uint128_t operator-(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const uint128_t lhs, const uint128_t rhs) noexcept { return impl::default_sub(lhs, rhs); } @@ -2498,12 +2498,12 @@ BOOST_INT128_EXPORT constexpr uint128_t operator-(const uint128_t lhs, const uin #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs - static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) - rhs; } @@ -2511,14 +2511,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const det #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -2526,12 +2526,12 @@ BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const T, const uint128_t) noe #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs - static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) - rhs; } @@ -2539,7 +2539,7 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const det #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator-=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator-=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -2549,7 +2549,7 @@ constexpr uint128_t& uint128_t::operator-=(const Integer rhs) noexcept return *this; } -constexpr uint128_t& uint128_t::operator-=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator-=(const uint128_t rhs) noexcept { *this = *this - rhs; return *this; @@ -2558,7 +2558,7 @@ constexpr uint128_t& uint128_t::operator-=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator-=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator-=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -2583,7 +2583,7 @@ namespace detail { #if defined(_M_AMD64) && !defined(__GNUC__) -BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const uint128_t rhs) noexcept { uint128_t result {}; result.low = _umul128(lhs.low, rhs.low, &result.high); @@ -2593,7 +2593,7 @@ BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const uint128_ return result; } -BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint64_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint64_t rhs) noexcept { uint128_t result {}; result.low = _umul128(lhs.low, rhs, &result.high); @@ -2602,7 +2602,7 @@ BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uin return result; } -BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint32_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint32_t rhs) noexcept { uint128_t result {}; result.low = _umul128(lhs.low, static_cast(rhs), &result.high); @@ -2613,7 +2613,7 @@ BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uin #elif defined(_M_ARM64) -BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const uint128_t rhs) noexcept { const auto low_low{lhs.low * rhs.low}; const auto high_low_low{__umulh(lhs.low, rhs.low)}; @@ -2626,7 +2626,7 @@ BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const uint128_ return {high, low_low}; } -BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint64_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint64_t rhs) noexcept { const auto low{lhs.low * rhs}; const auto high{__umulh(lhs.low, rhs) + (lhs.high * rhs)}; @@ -2634,7 +2634,7 @@ BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uin return {high, low}; } -BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint32_t rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uint32_t rhs) noexcept { const auto low{lhs.low * rhs}; const auto high{__umulh(lhs.low, static_cast(rhs)) + (lhs.high * rhs)}; @@ -2645,7 +2645,7 @@ BOOST_INT128_FORCE_INLINE uint128_t msvc_mul(const uint128_t lhs, const std::uin #endif // MSVC implementations template -BOOST_INT128_FORCE_INLINE constexpr uint128_t default_mul(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_mul(const uint128_t lhs, const UnsignedInteger rhs) noexcept { #if (defined(__aarch64__) || defined(__x86_64__) || defined(__PPC__) || defined(__powerpc__)) && defined(__GNUC__) && defined(BOOST_INT128_HAS_INT128) @@ -2722,7 +2722,7 @@ BOOST_INT128_FORCE_INLINE constexpr uint128_t default_mul(const uint128_t lhs, c #endif BOOST_INT128_EXPORT template -constexpr uint128_t operator*(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2744,7 +2744,7 @@ constexpr uint128_t operator*(const uint128_t lhs, const SignedInteger rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator*(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2766,7 +2766,7 @@ constexpr uint128_t operator*(const SignedInteger lhs, const uint128_t rhs) noex } BOOST_INT128_EXPORT template -constexpr uint128_t operator*(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const UnsignedInteger rhs) noexcept { return detail::default_mul(lhs, static_cast(rhs)); } @@ -2776,12 +2776,12 @@ constexpr uint128_t operator*(const uint128_t lhs, const UnsignedInteger rhs) no #endif BOOST_INT128_EXPORT template -constexpr uint128_t operator*(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const UnsignedInteger lhs, const uint128_t rhs) noexcept { return detail::default_mul(rhs, static_cast(lhs)); } -BOOST_INT128_EXPORT constexpr uint128_t operator*(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const uint128_t rhs) noexcept { return detail::default_mul(lhs, rhs); } @@ -2790,7 +2790,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator*(const uint128_t lhs, const uin #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator*(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { const auto abs_rhs {rhs < 0 ? -static_cast(rhs) : static_cast(rhs)}; const auto res {lhs * abs_rhs}; @@ -2798,7 +2798,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator*(const uint128_t lhs, const det return rhs < 0 ? -res : res; } -BOOST_INT128_EXPORT constexpr uint128_t operator*(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { const auto abs_lhs {lhs < 0 ? -static_cast(lhs) : static_cast(lhs)}; const auto res {abs_lhs * rhs}; @@ -2809,14 +2809,14 @@ BOOST_INT128_EXPORT constexpr uint128_t operator*(const detail::builtin_i128 lhs #else BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator*(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -constexpr uint128_t operator*(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -2824,12 +2824,12 @@ constexpr uint128_t operator*(const T, const uint128_t) noexcept #endif // BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT constexpr uint128_t operator*(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs * static_cast(rhs); } -BOOST_INT128_EXPORT constexpr uint128_t operator*(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) * rhs; } @@ -2837,7 +2837,7 @@ BOOST_INT128_EXPORT constexpr uint128_t operator*(const detail::builtin_u128 lhs #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator*=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator*=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -2847,7 +2847,7 @@ constexpr uint128_t& uint128_t::operator*=(const Integer rhs) noexcept return *this; } -constexpr uint128_t& uint128_t::operator*=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator*=(const uint128_t rhs) noexcept { *this = *this * rhs; return *this; @@ -2856,7 +2856,7 @@ constexpr uint128_t& uint128_t::operator*=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator*=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator*=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -2874,21 +2874,21 @@ inline uint128_t& uint128_t::operator*=(const Integer rhs) noexcept // For div we need forward declarations since we mix and match the arguments BOOST_INT128_EXPORT template -constexpr uint128_t operator/(uint128_t lhs, SignedInteger rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint128_t lhs, SignedInteger rhs) noexcept; BOOST_INT128_EXPORT template -constexpr uint128_t operator/(SignedInteger lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(SignedInteger lhs, uint128_t rhs) noexcept; BOOST_INT128_EXPORT template -constexpr uint128_t operator/(uint128_t lhs, UnsignedInteger rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint128_t lhs, UnsignedInteger rhs) noexcept; BOOST_INT128_EXPORT template -constexpr uint128_t operator/(UnsignedInteger lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(UnsignedInteger lhs, uint128_t rhs) noexcept; -BOOST_INT128_EXPORT constexpr uint128_t operator/(uint128_t lhs, uint128_t rhs) noexcept; +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint128_t lhs, uint128_t rhs) noexcept; template -constexpr uint128_t operator/(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2906,7 +2906,7 @@ constexpr uint128_t operator/(const uint128_t lhs, const SignedInteger rhs) noex } template -constexpr uint128_t operator/(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -2924,7 +2924,7 @@ constexpr uint128_t operator/(const SignedInteger lhs, const uint128_t rhs) noex } template -constexpr uint128_t operator/(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const uint128_t lhs, const UnsignedInteger rhs) noexcept { using eval_type = detail::evaluation_type_t; @@ -2946,7 +2946,7 @@ constexpr uint128_t operator/(const uint128_t lhs, const UnsignedInteger rhs) no } template -constexpr uint128_t operator/(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const UnsignedInteger lhs, const uint128_t rhs) noexcept { using eval_type = detail::evaluation_type_t; @@ -2963,7 +2963,7 @@ constexpr uint128_t operator/(const UnsignedInteger lhs, const uint128_t rhs) no return {0, static_cast(lhs) / rhs.low}; } -constexpr uint128_t operator/(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const uint128_t lhs, const uint128_t rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs == 0U)) { @@ -3004,24 +3004,24 @@ constexpr uint128_t operator/(const uint128_t lhs, const uint128_t rhs) noexcept #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs / static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) / rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs / static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) / rhs; } @@ -3029,14 +3029,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const det #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -3047,7 +3047,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const T, const uint128_t) noe #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator/=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator/=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -3057,7 +3057,7 @@ constexpr uint128_t& uint128_t::operator/=(const Integer rhs) noexcept return *this; } -constexpr uint128_t& uint128_t::operator/=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator/=(const uint128_t rhs) noexcept { *this = *this / rhs; return *this; @@ -3066,7 +3066,7 @@ constexpr uint128_t& uint128_t::operator/=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator/=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator/=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); @@ -3084,21 +3084,21 @@ inline uint128_t& uint128_t::operator/=(const Integer rhs) noexcept // For div we need forward declarations since we mix and match the arguments BOOST_INT128_EXPORT template -constexpr uint128_t operator%(uint128_t lhs, SignedInteger rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint128_t lhs, SignedInteger rhs) noexcept; BOOST_INT128_EXPORT template -constexpr uint128_t operator%(SignedInteger lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(SignedInteger lhs, uint128_t rhs) noexcept; BOOST_INT128_EXPORT template -constexpr uint128_t operator%(uint128_t lhs, UnsignedInteger rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint128_t lhs, UnsignedInteger rhs) noexcept; BOOST_INT128_EXPORT template -constexpr uint128_t operator%(UnsignedInteger lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(UnsignedInteger lhs, uint128_t rhs) noexcept; -BOOST_INT128_EXPORT constexpr uint128_t operator%(uint128_t lhs, uint128_t rhs) noexcept; +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint128_t lhs, uint128_t rhs) noexcept; template -constexpr uint128_t operator%(const uint128_t lhs, const SignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const uint128_t lhs, const SignedInteger rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -3116,7 +3116,7 @@ constexpr uint128_t operator%(const uint128_t lhs, const SignedInteger rhs) noex } template -constexpr uint128_t operator%(const SignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const SignedInteger lhs, const uint128_t rhs) noexcept { #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION @@ -3134,7 +3134,7 @@ constexpr uint128_t operator%(const SignedInteger lhs, const uint128_t rhs) noex } template -constexpr uint128_t operator%(const uint128_t lhs, const UnsignedInteger rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const uint128_t lhs, const UnsignedInteger rhs) noexcept { using eval_type = detail::evaluation_type_t; @@ -3159,7 +3159,7 @@ constexpr uint128_t operator%(const uint128_t lhs, const UnsignedInteger rhs) no } template -constexpr uint128_t operator%(const UnsignedInteger lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const UnsignedInteger lhs, const uint128_t rhs) noexcept { using eval_type = detail::evaluation_type_t; @@ -3175,7 +3175,7 @@ constexpr uint128_t operator%(const UnsignedInteger lhs, const uint128_t rhs) no return {0, static_cast(lhs) % rhs.low}; } -constexpr uint128_t operator%(const uint128_t lhs, const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const uint128_t lhs, const uint128_t rhs) noexcept { if (BOOST_INT128_UNLIKELY(rhs == 0U)) { @@ -3218,24 +3218,24 @@ constexpr uint128_t operator%(const uint128_t lhs, const uint128_t rhs) noexcept #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128) -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept { return lhs % static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) % rhs; } #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept { return lhs % static_cast(rhs); } -BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept { return static_cast(lhs) % rhs; } @@ -3243,14 +3243,14 @@ BOOST_INT128_EXPORT BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const det #else BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t, const T) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t, const T) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; } BOOST_INT128_EXPORT template ::value, bool> = true> -BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const T, const uint128_t) noexcept +BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const T, const uint128_t) noexcept { static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); return {0, 0}; @@ -3261,7 +3261,7 @@ BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const T, const uint128_t) noe #endif // BOOST_INT128_HAS_INT128 template -constexpr uint128_t& uint128_t::operator%=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator%=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(detail::is_unsigned_integer_v, "Sign Conversion Error"); @@ -3271,7 +3271,7 @@ constexpr uint128_t& uint128_t::operator%=(const Integer rhs) noexcept return *this; } -constexpr uint128_t& uint128_t::operator%=(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator%=(const uint128_t rhs) noexcept { *this = *this % rhs; return *this; @@ -3280,7 +3280,7 @@ constexpr uint128_t& uint128_t::operator%=(const uint128_t rhs) noexcept #ifdef BOOST_INT128_HAS_MSVC_INT128 template -inline uint128_t& uint128_t::operator%=(const Integer rhs) noexcept +BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator%=(const Integer rhs) noexcept { #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION static_assert(!std::numeric_limits::is_signed, "Sign Conversion Error"); diff --git a/include/boost/int128/detail/utilities.hpp b/include/boost/int128/detail/utilities.hpp index 019d76c0..cc3ed445 100644 --- a/include/boost/int128/detail/utilities.hpp +++ b/include/boost/int128/detail/utilities.hpp @@ -5,6 +5,8 @@ #ifndef BOOST_INT128_DETAIL_UTILITIES_HPP #define BOOST_INT128_DETAIL_UTILITIES_HPP +#include + #ifndef BOOST_INT128_BUILD_MODULE #include @@ -16,7 +18,7 @@ namespace int128 { namespace detail { template -constexpr std::size_t strlen(const T* str) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::size_t strlen(const T* str) noexcept { std::size_t i {}; while (*str != '\0') diff --git a/include/boost/int128/literals.hpp b/include/boost/int128/literals.hpp index a8c99e5e..9497f20b 100644 --- a/include/boost/int128/literals.hpp +++ b/include/boost/int128/literals.hpp @@ -15,76 +15,76 @@ namespace boost { namespace int128 { namespace literals { -BOOST_INT128_EXPORT constexpr uint128_t operator ""_u128(const char* str) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(const char* str) noexcept { uint128_t result {}; detail::from_chars(str, str + detail::strlen(str), result); return result; } -BOOST_INT128_EXPORT constexpr uint128_t operator ""_U128(const char* str) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(const char* str) noexcept { uint128_t result {}; detail::from_chars(str, str + detail::strlen(str), result); return result; } -BOOST_INT128_EXPORT constexpr uint128_t operator ""_u128(const char* str, std::size_t len) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(const char* str, std::size_t len) noexcept { uint128_t result {}; detail::from_chars(str, str + len, result); return result; } -BOOST_INT128_EXPORT constexpr uint128_t operator ""_U128(const char* str, std::size_t len) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(const char* str, std::size_t len) noexcept { uint128_t result {}; detail::from_chars(str, str + len, result); return result; } -BOOST_INT128_EXPORT constexpr uint128_t operator ""_u128(unsigned long long v) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(unsigned long long v) noexcept { return uint128_t{v}; } -BOOST_INT128_EXPORT constexpr uint128_t operator ""_U128(unsigned long long v) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(unsigned long long v) noexcept { return uint128_t{v}; } -BOOST_INT128_EXPORT constexpr int128_t operator ""_i128(const char* str) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str) noexcept { int128_t result {}; detail::from_chars(str, str + detail::strlen(str), result); return result; } -constexpr int128_t operator ""_I128(const char* str) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(const char* str) noexcept { int128_t result {}; detail::from_chars(str, str + detail::strlen(str), result); return result; } -BOOST_INT128_EXPORT constexpr int128_t operator ""_i128(unsigned long long v) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(unsigned long long v) noexcept { return int128_t{v}; } -BOOST_INT128_EXPORT constexpr int128_t operator ""_I128(unsigned long long v) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(unsigned long long v) noexcept { return int128_t{v}; } -BOOST_INT128_EXPORT constexpr int128_t operator ""_i128(const char* str, std::size_t len) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str, std::size_t len) noexcept { int128_t result {}; detail::from_chars(str, str + len, result); return result; } -BOOST_INT128_EXPORT constexpr int128_t operator ""_I128(const char* str, std::size_t len) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(const char* str, std::size_t len) noexcept { int128_t result {}; detail::from_chars(str, str + len, result); diff --git a/include/boost/int128/numeric.hpp b/include/boost/int128/numeric.hpp index 6aab714a..2375154f 100644 --- a/include/boost/int128/numeric.hpp +++ b/include/boost/int128/numeric.hpp @@ -52,7 +52,7 @@ BOOST_INT128_INLINE_CONSTEXPR bool is_reduced_integer_v {reduced_integers= 0 && y >= 0) { @@ -113,7 +113,7 @@ constexpr int128_t add_sat(const int128_t x, const int128_t y) noexcept } } -constexpr int128_t sub_sat(const int128_t x, const int128_t y) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(const int128_t x, const int128_t y) noexcept { if (x <= 0 && y >= 0) { @@ -141,7 +141,7 @@ constexpr int128_t sub_sat(const int128_t x, const int128_t y) noexcept # pragma warning(pop) #endif -BOOST_INT128_EXPORT constexpr uint128_t mul_sat(const uint128_t x, const uint128_t y) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t mul_sat(const uint128_t x, const uint128_t y) noexcept { const auto x_bits {bit_width(x)}; const auto y_bits {bit_width(y)}; @@ -154,7 +154,7 @@ BOOST_INT128_EXPORT constexpr uint128_t mul_sat(const uint128_t x, const uint128 return x * y; } -BOOST_INT128_EXPORT constexpr int128_t mul_sat(const int128_t& x, const int128_t& y) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t mul_sat(const int128_t& x, const int128_t& y) noexcept { const auto x_bits {bit_width(static_cast(abs(x)))}; const auto y_bits {bit_width(static_cast(abs(y)))}; @@ -175,12 +175,12 @@ BOOST_INT128_EXPORT constexpr int128_t mul_sat(const int128_t& x, const int128_t return res; } -BOOST_INT128_EXPORT constexpr uint128_t div_sat(const uint128_t x, const uint128_t y) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t div_sat(const uint128_t x, const uint128_t y) noexcept { return x / y; } -BOOST_INT128_EXPORT constexpr int128_t div_sat(const int128_t x, const int128_t y) noexcept +BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t div_sat(const int128_t x, const int128_t y) noexcept { if (BOOST_INT128_UNLIKELY(x == (std::numeric_limits::min)() && y == -1)) { @@ -197,7 +197,7 @@ BOOST_INT128_EXPORT constexpr int128_t div_sat(const int128_t x, const int128_t #endif BOOST_INT128_EXPORT template , bool> = true> -constexpr TargetType saturate_cast(const uint128_t value) noexcept +BOOST_INT128_HOST_DEVICE constexpr TargetType saturate_cast(const uint128_t value) noexcept { BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { @@ -219,7 +219,7 @@ constexpr TargetType saturate_cast(const uint128_t value) noexcept #endif BOOST_INT128_EXPORT template , bool> = true> -constexpr TargetType saturate_cast(const int128_t value) noexcept +BOOST_INT128_HOST_DEVICE constexpr TargetType saturate_cast(const int128_t value) noexcept { BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { @@ -251,7 +251,7 @@ constexpr TargetType saturate_cast(const int128_t value) noexcept namespace detail { -constexpr std::uint64_t gcd64(std::uint64_t x, std::uint64_t y) noexcept +BOOST_INT128_HOST_DEVICE constexpr std::uint64_t gcd64(std::uint64_t x, std::uint64_t y) noexcept { if (x == 0) { @@ -283,7 +283,7 @@ constexpr std::uint64_t gcd64(std::uint64_t x, std::uint64_t y) noexcept } // namespace detail -constexpr uint128_t gcd(uint128_t a, uint128_t b) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t gcd(uint128_t a, uint128_t b) noexcept { // Base case if (a == 0U) @@ -320,7 +320,7 @@ constexpr uint128_t gcd(uint128_t a, uint128_t b) noexcept return uint128_t{0, g} << shift; } -constexpr int128_t gcd(const int128_t a, const int128_t b) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t gcd(const int128_t a, const int128_t b) noexcept { return static_cast(gcd(static_cast(abs(a)), static_cast(abs(b)))); } @@ -330,7 +330,7 @@ constexpr int128_t gcd(const int128_t a, const int128_t b) noexcept // but very slow impl that we know works. #if !(defined(_M_IX86) && !defined(_NDEBUG)) -constexpr uint128_t lcm(const uint128_t a, const uint128_t b) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t lcm(const uint128_t a, const uint128_t b) noexcept { if (a == 0U || b == 0U) { @@ -346,7 +346,7 @@ constexpr uint128_t lcm(const uint128_t a, const uint128_t b) noexcept #else -constexpr uint128_t lcm(uint128_t a, uint128_t b) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t lcm(uint128_t a, uint128_t b) noexcept { if (a == 0U || b == 0U) { @@ -380,12 +380,12 @@ constexpr uint128_t lcm(uint128_t a, uint128_t b) noexcept #endif -constexpr int128_t lcm(const int128_t a, const int128_t b) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t lcm(const int128_t a, const int128_t b) noexcept { return static_cast(lcm(static_cast(abs(a)), static_cast(abs(b)))); } -constexpr uint128_t midpoint(const uint128_t a, const uint128_t b) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t midpoint(const uint128_t a, const uint128_t b) noexcept { // Bit manipulation formula works for unsigned integers auto mid {(a & b) + ((a ^ b) >> 1)}; @@ -399,7 +399,7 @@ constexpr uint128_t midpoint(const uint128_t a, const uint128_t b) noexcept return mid; } -constexpr int128_t midpoint(const int128_t a, const int128_t b) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t midpoint(const int128_t a, const int128_t b) noexcept { // For signed integers, we use a + (b - a) / 2 or a - (a - b) / 2 // The subtraction is done in unsigned arithmetic to handle overflow correctly From f931eaee3cc08738daf4483c8674d8e811259c96 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 15:23:47 -0400 Subject: [PATCH 049/137] Add HOST_DEVICE macro to automatic configuration page --- doc/modules/ROOT/pages/api_reference.adoc | 3 +++ doc/modules/ROOT/pages/config.adoc | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc index b5a8bd4a..3ce99e76 100644 --- a/doc/modules/ROOT/pages/api_reference.adoc +++ b/doc/modules/ROOT/pages/api_reference.adoc @@ -323,6 +323,9 @@ Listed by analogous STL header. | xref:config.adoc#automatic_config[`BOOST_INT128_ENDIAN_BIG_BYTE`] | Defined on big-endian systems + +| xref:config.adoc#host_device[`BOOST_INT128_HOST_DEVICE`] +| Expands to `pass:[__host__ __device__]` under NVCC for CUDA support |=== [#api_headers] diff --git a/doc/modules/ROOT/pages/config.adoc b/doc/modules/ROOT/pages/config.adoc index 701e0a46..7d8458e1 100644 --- a/doc/modules/ROOT/pages/config.adoc +++ b/doc/modules/ROOT/pages/config.adoc @@ -42,3 +42,7 @@ This macro will automatically be defined in the presence of `-fno-exceptions` or - `BOOST_INT128_ENDIAN_LITTLE_BYTE`: This is defined to `1` when compiling on a little endian architecture, otherwise `0`. - `BOOST_INT128_ENDIAN_BIG_BYTE`: This is defined to `1` when compiling on a big endian architecture, otherwise `0`. + +[#host_device] +- `BOOST_INT128_HOST_DEVICE`: This is defined to `pass:[__host__ __device__]` when compiling with NVCC (`pass:[__NVCC__]` is defined), and to nothing otherwise. +All public functions, constructors, operators, and conversion operators in the library are annotated with this macro, allowing `int128_t` and `uint128_t` to be used in CUDA device code without modification. From ba811a7f2f6d9f087269ffed22a8d2a7ce707b05 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 15:23:57 -0400 Subject: [PATCH 050/137] Decorate docs --- doc/modules/ROOT/pages/bit.adoc | 24 ++-- doc/modules/ROOT/pages/cstdlib.adoc | 4 +- doc/modules/ROOT/pages/int128_t.adoc | 150 ++++++++++----------- doc/modules/ROOT/pages/literals.adoc | 24 ++-- doc/modules/ROOT/pages/mixed_type_ops.adoc | 88 ++++++------ doc/modules/ROOT/pages/numeric.adoc | 30 ++--- doc/modules/ROOT/pages/uint128_t.adoc | 150 ++++++++++----------- 7 files changed, 235 insertions(+), 235 deletions(-) diff --git a/doc/modules/ROOT/pages/bit.adoc b/doc/modules/ROOT/pages/bit.adoc index f9bfe4eb..a25f94e9 100644 --- a/doc/modules/ROOT/pages/bit.adoc +++ b/doc/modules/ROOT/pages/bit.adoc @@ -28,7 +28,7 @@ Returns `true` if `x` is a power of two; otherwise `false` namespace boost { namespace int128 { -constexpr bool has_single_bit(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool has_single_bit(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -44,7 +44,7 @@ Returns the number of consecutive `0` bits in the value `x`, starting from the m namespace boost { namespace int128 { -constexpr int countl_zero(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int countl_zero(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -60,7 +60,7 @@ Returns the number of consecutive `1` bits in the value `x`, starting from the m namespace boost { namespace int128 { -constexpr int countl_one(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int countl_one(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -77,7 +77,7 @@ If `x` is zero, returns 0 namespace boost { namespace int128 { -constexpr int bit_width(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int bit_width(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -93,7 +93,7 @@ Returns the smallest integral power of two that is not smaller than `x`. namespace boost { namespace int128 { -constexpr uint128_t bit_ceil(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t bit_ceil(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -110,7 +110,7 @@ If `x` is 0 then returns 0. namespace boost { namespace int128 { -constexpr uint128_t bit_floor(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t bit_floor(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -126,7 +126,7 @@ Returns the number of consecutive `0` bits in the value `x`, starting from the l namespace boost { namespace int128 { -constexpr int countr_zero(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int countr_zero(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -142,7 +142,7 @@ Returns the number of consecutive `1` bits in the value `x`, starting from the l namespace boost { namespace int128 { -constexpr int countr_one(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int countr_one(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -159,7 +159,7 @@ This operation is also known as a left circular shift. namespace boost { namespace int128 { -constexpr uint128_t rotl(uint128_t x, int s) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t rotl(uint128_t x, int s) noexcept; } // namespace int128 } // namespace boost @@ -176,7 +176,7 @@ This operation is also known as a right circular shift. namespace boost { namespace int128 { -constexpr uint128_t rotr(uint128_t x, int s) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t rotr(uint128_t x, int s) noexcept; } // namespace int128 } // namespace boost @@ -192,7 +192,7 @@ Returns the number of `1` bits in `x`. namespace boost { namespace int128 { -constexpr int popcount(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int popcount(uint128_t x) noexcept; } // namespace int128 } // namespace boost @@ -208,7 +208,7 @@ Reverses the bytes in the given integer value `x`. namespace boost { namespace int128 { -constexpr uint128_t byteswap(uint128_t x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(uint128_t x) noexcept; } // namespace int128 } // namespace boost diff --git a/doc/modules/ROOT/pages/cstdlib.adoc b/doc/modules/ROOT/pages/cstdlib.adoc index de43ed55..b3a6ea8f 100644 --- a/doc/modules/ROOT/pages/cstdlib.adoc +++ b/doc/modules/ROOT/pages/cstdlib.adoc @@ -51,9 +51,9 @@ Using the structures defined above, the `div` function computes both quotient an namespace boost { namespace int128 { -constexpr u128div_t div(uint128_t lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr u128div_t div(uint128_t lhs, uint128_t rhs) noexcept; -constexpr i128div_t div(int128_t lhs, int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr i128div_t div(int128_t lhs, int128_t rhs) noexcept; } // namespace int128 } // namespace boost diff --git a/doc/modules/ROOT/pages/int128_t.adoc b/doc/modules/ROOT/pages/int128_t.adoc index 1765099d..3989fe6a 100644 --- a/doc/modules/ROOT/pages/int128_t.adoc +++ b/doc/modules/ROOT/pages/int128_t.adoc @@ -81,30 +81,30 @@ struct int128_t ... // Defaulted basic construction - constexpr int128_t() noexcept = default; - constexpr int128_t(const int128_t&) noexcept = default; - constexpr int128_t(int128_t&&) noexcept = default; - constexpr int128_t& operator=(const int128_t&) noexcept = default; - constexpr int128_t& operator=(int128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t() noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t(const int128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t(int128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(const int128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(int128_t&&) noexcept = default; - explicit constexpr int128_t(const uint128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept; // Construct from integral types - constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept; template - constexpr int128_t(const SignedInteger v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t(const SignedInteger v) noexcept; template - constexpr int128_t(const UnsignedInteger v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t(const UnsignedInteger v) noexcept; #ifdef BOOST_INT128_HAS_INT128 // Typically a typedef from __int128 - constexpr int128_t(const detail::builtin_i128 v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t(const detail::builtin_i128 v) noexcept; // Typically a typedef unsigned __int128 - constexpr int128_t(const detail::builtin_u128 v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr int128_t(const detail::builtin_u128 v) noexcept; #endif // BOOST_INT128_HAS_INT128 }; @@ -129,26 +129,26 @@ struct int128_t ... // Integer conversion operators - constexpr operator bool() const noexcept; + BOOST_INT128_HOST_DEVICE constexpr operator bool() const noexcept; template - explicit constexpr operator SignedInteger() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept; template - explicit constexpr operator UnsignedInteger() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept; #ifdef BOOST_INT128_HAS_INT128 - explicit constexpr operator detail::builtin_i128() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_i128() const noexcept; - explicit constexpr operator detail::builtin_u128() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_u128() const noexcept; #endif // BOOST_INT128_HAS_INT128 // Conversion to float - explicit constexpr operator float() const noexcept; - explicit constexpr operator double() const noexcept; - explicit constexpr operator long double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; }; } // namespace int128 @@ -168,12 +168,12 @@ as the number of digits it represents can exceed the precision of the significan [source, c++] ---- template -constexpr bool operator<(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const int128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator<(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const Integer lhs, const int128_t rhs) noexcept; -constexpr bool operator<(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns if the `lhs` value is less than the `rhs` value without exception. @@ -184,12 +184,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator<=(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const int128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator<=(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const Integer lhs, const int128_t rhs) noexcept; -constexpr bool operator<=(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns if the `lhs` value is less than or equal to the `rhs` value without exception. @@ -200,12 +200,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator>(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const int128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator>(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const Integer lhs, const int128_t rhs) noexcept; -constexpr bool operator>(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns if the `lhs` value is greater than the `rhs` value without exception. @@ -216,12 +216,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator>=(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const int128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator>=(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const Integer lhs, const int128_t rhs) noexcept; -constexpr bool operator>=(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns if the `lhs` value is greater than or equal to the `rhs` value without exception. @@ -232,12 +232,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator==(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const int128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator==(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const Integer lhs, const int128_t rhs) noexcept; -constexpr bool operator==(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns if the `lhs` value is equal to the `rhs` value without exception. @@ -248,12 +248,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator!=(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const int128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator!=(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const Integer lhs, const int128_t rhs) noexcept; -constexpr bool operator!=(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns if the `lhs` value is not equal to the `rhs` value without exception. @@ -263,13 +263,13 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- -constexpr std::strong_ordering operator<=>(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const int128_t lhs, const int128_t rhs) noexcept; template -constexpr std::strong_ordering operator<=>(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const int128_t lhs, const Integer rhs) noexcept; template -constexpr std::strong_ordering operator<=>(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const Integer lhs, const int128_t rhs) noexcept; ---- Returns one of the following without exception: @@ -285,7 +285,7 @@ Returns one of the following without exception: [source, c++] ---- -constexpr int128_t operator~(const int128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr int128_t operator~(const int128_t rhs) noexcept ---- Returns the bitwise negation of `rhs` without exception. @@ -295,12 +295,12 @@ Returns the bitwise negation of `rhs` without exception. [source, c++] ---- template -constexpr int128_t operator|(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator|(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator|(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns the bitwise or of `lhs` and `rhs` without exception. @@ -311,12 +311,12 @@ This operation is subject to mixed sign limitations discussed xref:int128_t.adoc [source, c++] ---- template -constexpr int128_t operator&(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator&(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator&(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns the bitwise and of `lhs` and `rhs` without exception. @@ -327,12 +327,12 @@ This operation is subject to mixed sign limitations discussed xref:int128_t.adoc [source, c++] ---- template -constexpr int128_t operator^(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator^(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator^(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns the bitwise xor of `lhs` and `rhs` without exception. @@ -343,18 +343,18 @@ This operation is subject to mixed sign limitations discussed xref:int128_t.adoc [source, c++] ---- template -constexpr int128_t operator<<(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator<<(const int128_t lhs, const Integer rhs) noexcept; template ::value && (sizeof(Integer) * 8 > 16), bool> = true> -constexpr Integer operator<<(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr Integer operator<<(const Integer lhs, const int128_t rhs) noexcept; template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator<<(const SignedInteger lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int operator<<(const SignedInteger lhs, const int128_t rhs) noexcept; template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned int operator<<(const UnsignedInteger lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr unsigned int operator<<(const UnsignedInteger lhs, const int128_t rhs) noexcept; -constexpr int128_t operator<<(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator<<(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns the bitwise left shift of `lhs` without exception. @@ -366,18 +366,18 @@ This operation is subject to mixed sign limitations discussed xref:int128_t.adoc [source, c++] ---- template -constexpr int128_t operator>>(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator>>(const int128_t lhs, const Integer rhs) noexcept; template ::value && (sizeof(Integer) * 8 > 16), bool> = true> -constexpr Integer operator>>(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr Integer operator>>(const Integer lhs, const int128_t rhs) noexcept; template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator>>(const SignedInteger lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int operator>>(const SignedInteger lhs, const int128_t rhs) noexcept; template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned operator>>(UnsignedInteger lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr unsigned operator>>(UnsignedInteger lhs, const int128_t rhs) noexcept; -constexpr int128_t operator>>(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator>>(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns the bitwise right shift of `lhs` without exception. @@ -393,12 +393,12 @@ This operation is subject to mixed sign limitations discussed xref:int128_t.adoc [source, c++] ---- template -constexpr int128_t operator+(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator+(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator+(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns as an `int128_t` the sum of `lhs` and `rhs`. @@ -410,12 +410,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr int128_t operator-(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator-(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator-(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns as an `int128_t` the difference of `lhs` and `rhs`. @@ -427,12 +427,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr int128_t operator*(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator*(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator*(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns as an `int128_t` the product of `lhs` and `rhs`. @@ -444,12 +444,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr int128_t operator/(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator/(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator/(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns as an `int128_t` the quotient of `lhs` and `rhs` without exception. @@ -460,12 +460,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr int128_t operator%(const int128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const Integer rhs) noexcept; template -constexpr int128_t operator%(const Integer lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const Integer lhs, const int128_t rhs) noexcept; -constexpr int128_t operator%(const int128_t lhs, const int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const int128_t rhs) noexcept; ---- Returns as an `int128_t` the remainder of `lhs` and `rhs` without exception. diff --git a/doc/modules/ROOT/pages/literals.adoc b/doc/modules/ROOT/pages/literals.adoc index 2b698704..bb4a5a0f 100644 --- a/doc/modules/ROOT/pages/literals.adoc +++ b/doc/modules/ROOT/pages/literals.adoc @@ -18,29 +18,29 @@ namespace boost { namespace int128 { namespace literals { -constexpr uint128_t operator ""_u128(const char* str) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(const char* str) noexcept; -constexpr uint128_t operator ""_U128(const char* str) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(const char* str) noexcept; -constexpr uint128_t operator ""_u128(const char* str, std::size_t len) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(const char* str, std::size_t len) noexcept; -constexpr uint128_t operator ""_U128(const char* str, std::size_t len) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(const char* str, std::size_t len) noexcept; -constexpr uint128_t operator ""_u128(unsigned long long v) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(unsigned long long v) noexcept; -constexpr uint128_t operator ""_U128(unsigned long long v) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(unsigned long long v) noexcept; -constexpr int128_t operator ""_i128(const char* str) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str) noexcept; -constexpr int128_t operator ""_I128(const char* str) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(const char* str) noexcept; -constexpr int128_t operator ""_i128(const char* str, std::size_t len) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str, std::size_t len) noexcept; -constexpr int128_t operator ""_I128(const char* str, std::size_t len) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(const char* str, std::size_t len) noexcept; -constexpr int128_t operator ""_i128(unsigned long long v) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(unsigned long long v) noexcept; -constexpr int128_t operator ""_I128(unsigned long long v) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(unsigned long long v) noexcept; } // namespace literals } // namespace int128 diff --git a/doc/modules/ROOT/pages/mixed_type_ops.adoc b/doc/modules/ROOT/pages/mixed_type_ops.adoc index 5a02877d..961490c8 100644 --- a/doc/modules/ROOT/pages/mixed_type_ops.adoc +++ b/doc/modules/ROOT/pages/mixed_type_ops.adoc @@ -32,53 +32,53 @@ namespace int128 { // Comparison Operators //===================================== -constexpr bool operator==(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator==(uint128_t lhs, int128_t rhs); -constexpr bool operator==(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator==(int128_t lhs, uint128_t rhs); -constexpr bool operator!=(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(uint128_t lhs, int128_t rhs); -constexpr bool operator!=(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(int128_t lhs, uint128_t rhs); -constexpr bool operator<(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator<(uint128_t lhs, int128_t rhs); -constexpr bool operator<(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator<(int128_t lhs, uint128_t rhs); -constexpr bool operator<=(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(uint128_t lhs, int128_t rhs); -constexpr bool operator<=(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(int128_t lhs, uint128_t rhs); -constexpr bool operator>(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>(uint128_t lhs, int128_t rhs); -constexpr bool operator>(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>(int128_t lhs, uint128_t rhs); -constexpr bool operator>=(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(uint128_t lhs, int128_t rhs); -constexpr bool operator>=(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(int128_t lhs, uint128_t rhs); //===================================== // Arithmetic Operators //===================================== -constexpr uint128_t operator+(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator+(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(int128_t lhs, uint128_t rhs); -constexpr uint128_t operator-(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator-(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(int128_t lhs, uint128_t rhs); -constexpr uint128_t operator*(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator*(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(int128_t lhs, uint128_t rhs); -constexpr uint128_t operator/(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator/(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(int128_t lhs, uint128_t rhs); -constexpr uint128_t operator%(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator%(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(int128_t lhs, uint128_t rhs); } // namespace int128 } // namespace boost @@ -93,9 +93,9 @@ If you define xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_COMPARE`], [source, c++] ---- -constexpr bool operator==(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator==(uint128_t lhs, int128_t rhs); -constexpr bool operator==(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator==(int128_t lhs, uint128_t rhs); ---- If the `int128_t` argument is less than 0 returns `false`. @@ -105,9 +105,9 @@ Otherwise, returns the same as `static_cast(lhs) == static_cast(lhs) != static_cast(lhs) < static_cast(lhs) pass:[<=] static_cas [source, c++] ---- -constexpr bool operator>(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>(uint128_t lhs, int128_t rhs); -constexpr bool operator>(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>(int128_t lhs, uint128_t rhs); ---- If `lhs` is type `int128_t` returns `false` if `lhs < 0` @@ -156,9 +156,9 @@ Otherwise, returns the same as `static_cast(lhs) > static_cast=(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(uint128_t lhs, int128_t rhs); -constexpr bool operator>=(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(int128_t lhs, uint128_t rhs); ---- If `lhs` is type `int128_t` returns `false` if `lhs < 0` @@ -173,9 +173,9 @@ If you define xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_CONVERSION` [source, c++] ---- -constexpr uint128_t operator+(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator+(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(int128_t lhs, uint128_t rhs); ---- Returns the same as `static_cast(lhs) + static_cast(rhs)` @@ -184,9 +184,9 @@ Returns the same as `static_cast(lhs) + static_cast(rhs)` [source, c++] ---- -constexpr uint128_t operator-(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator-(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(int128_t lhs, uint128_t rhs); ---- Returns the same as `static_cast(lhs) - static_cast(rhs)` @@ -195,9 +195,9 @@ Returns the same as `static_cast(lhs) - static_cast(rhs)` [source, c++] ---- -constexpr uint128_t operator*(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator*(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(int128_t lhs, uint128_t rhs); ---- Returns the same as `static_cast(lhs) * static_cast(rhs)` @@ -206,9 +206,9 @@ Returns the same as `static_cast(lhs) * static_cast(rhs)` [source, c++] ---- -constexpr uint128_t operator/(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator/(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(int128_t lhs, uint128_t rhs); ---- Returns the same as `static_cast(lhs) / static_cast(rhs)` @@ -217,9 +217,9 @@ Returns the same as `static_cast(lhs) / static_cast(rhs)` [source, c++] ---- -constexpr uint128_t operator%(uint128_t lhs, int128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint128_t lhs, int128_t rhs); -constexpr uint128_t operator%(int128_t lhs, uint128_t rhs); +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(int128_t lhs, uint128_t rhs); ---- Returns the same as `static_cast(lhs) % static_cast(rhs)` diff --git a/doc/modules/ROOT/pages/numeric.adoc b/doc/modules/ROOT/pages/numeric.adoc index 904dc145..af0fb9b3 100644 --- a/doc/modules/ROOT/pages/numeric.adoc +++ b/doc/modules/ROOT/pages/numeric.adoc @@ -23,21 +23,21 @@ The following functions are provided for saturating arithmetic, and they *do not namespace boost { namespace int128 { -constexpr uint128_t add_sat(uint128_t lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t add_sat(uint128_t lhs, uint128_t rhs) noexcept; -constexpr int128_t add_sat(int128_t lhs, int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t add_sat(int128_t lhs, int128_t rhs) noexcept; -constexpr uint128_t sub_sat(uint128_t lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t sub_sat(uint128_t lhs, uint128_t rhs) noexcept; -constexpr int128_t sub_sat(int128_t lhs, int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(int128_t lhs, int128_t rhs) noexcept; -constexpr uint128_t mul_sat(uint128_t lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t mul_sat(uint128_t lhs, uint128_t rhs) noexcept; -constexpr int128_t mul_sat(int128_t lhs, int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t mul_sat(int128_t lhs, int128_t rhs) noexcept; -constexpr uint128_t div_sat(uint128_t lhs, uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t div_sat(uint128_t lhs, uint128_t rhs) noexcept; -constexpr int128_t div_sat(int128_t lhs, int128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t div_sat(int128_t lhs, int128_t rhs) noexcept; } // namespace int128 } // namespace boost @@ -58,7 +58,7 @@ namespace boost { namespace int128 { constexpr -constexpr TargetIntegerType saturate_cast(LibraryIntegerType x) noexcept; +BOOST_INT128_HOST_DEVICE constexpr TargetIntegerType saturate_cast(LibraryIntegerType x) noexcept; } // namespace int128 } // namespace boost @@ -76,9 +76,9 @@ Computes the greatest common divisor of `a` and `b`. namespace boost { namespace int128 { -constexpr uint128_t gcd(uint128_t a, uint128_t b) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t gcd(uint128_t a, uint128_t b) noexcept; -constexpr int128_t gcd(const int128_t a, const int128_t b) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t gcd(const int128_t a, const int128_t b) noexcept; } // namespace int128 } // namespace boost @@ -97,9 +97,9 @@ Computes the least common multiple of `a` and `b`. namespace boost { namespace int128 { -constexpr uint128_t lcm(uint128_t a, uint128_t b) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t lcm(uint128_t a, uint128_t b) noexcept; -constexpr int128_t lcm(const int128_t a, const int128_t b) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t lcm(const int128_t a, const int128_t b) noexcept; } // namespace int128 } // namespace boost @@ -118,9 +118,9 @@ Computes the midpoint of `a` and `b`, rounding towards `a`. namespace boost { namespace int128 { -constexpr uint128_t midpoint(uint128_t a, uint128_t b) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t midpoint(uint128_t a, uint128_t b) noexcept; -constexpr int128_t midpoint(const int128_t a, const int128_t b) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int128_t midpoint(const int128_t a, const int128_t b) noexcept; } // namespace int128 } // namespace boost diff --git a/doc/modules/ROOT/pages/uint128_t.adoc b/doc/modules/ROOT/pages/uint128_t.adoc index 06c2a59f..70c59fc3 100644 --- a/doc/modules/ROOT/pages/uint128_t.adoc +++ b/doc/modules/ROOT/pages/uint128_t.adoc @@ -120,30 +120,30 @@ struct uint128_t ... // Defaulted basic construction - constexpr uint128_t() noexcept = default; - constexpr uint128_t(const uint128_t&) noexcept = default; - constexpr uint128_t(uint128_t&&) noexcept = default; - constexpr uint128_t& operator=(const uint128_t&) noexcept = default; - constexpr uint128_t& operator=(uint128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t() noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const uint128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(uint128_t&&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(const uint128_t&) noexcept = default; + BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(uint128_t&&) noexcept = default; - explicit constexpr uint128_t(const int128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept; // Construct from integral types - constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept; template - constexpr uint128_t(const SignedInteger v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const SignedInteger v) noexcept; template - constexpr uint128_t(const UnsignedInteger v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const UnsignedInteger v) noexcept; #ifdef BOOST_INT128_HAS_INT128 // Typically a typedef from __int128 - constexpr uint128_t(const detail::builtin_i128 v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const detail::builtin_i128 v) noexcept; // Typically a typedef unsigned __int128 - constexpr uint128_t(const detail::builtin_u128 v) noexcept; + BOOST_INT128_HOST_DEVICE constexpr uint128_t(const detail::builtin_u128 v) noexcept; #endif // BOOST_INT128_HAS_INT128 }; @@ -168,26 +168,26 @@ struct uint128_t ... // Integer conversion operators - constexpr operator bool() const noexcept; + BOOST_INT128_HOST_DEVICE constexpr operator bool() const noexcept; template - explicit constexpr operator SignedInteger() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept; template - explicit constexpr operator UnsignedInteger() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept; #ifdef BOOST_INT128_HAS_INT128 - explicit constexpr operator detail::builtin_i128() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_i128() const noexcept; - explicit constexpr operator detail::builtin_u128() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_u128() const noexcept; #endif // BOOST_INT128_HAS_INT128 // Conversion to float - explicit constexpr operator float() const noexcept; - explicit constexpr operator double() const noexcept; - explicit constexpr operator long double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; }; } // namespace int128 @@ -207,12 +207,12 @@ as the number of digits it represents can exceed the precision of the significan [source, c++] ---- template -constexpr bool operator<(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator<(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const Integer lhs, const uint128_t rhs) noexcept; -constexpr bool operator<(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns if the `lhs` value is less than the `rhs` value without exception. @@ -223,12 +223,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator<=(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator<=(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const Integer lhs, const uint128_t rhs) noexcept; -constexpr bool operator<=(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns if the `lhs` value is less than or equal to the `rhs` value without exception. @@ -239,12 +239,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator>(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator>(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const Integer lhs, const uint128_t rhs) noexcept; -constexpr bool operator>(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns if the `lhs` value is greater than the `rhs` value without exception. @@ -255,12 +255,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator>=(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator>=(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const Integer lhs, const uint128_t rhs) noexcept; -constexpr bool operator>=(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns if the `lhs` value is greater than or equal to the `rhs` value without exception. @@ -271,12 +271,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator==(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator==(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const Integer lhs, const uint128_t rhs) noexcept; -constexpr bool operator==(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns if the `lhs` value is equal to the `rhs` value without exception. @@ -287,12 +287,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr bool operator!=(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr bool operator!=(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const Integer lhs, const uint128_t rhs) noexcept; -constexpr bool operator!=(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns if the `lhs` value is not equal to the `rhs` value without exception. @@ -302,13 +302,13 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- -constexpr std::strong_ordering operator<=>(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const uint128_t lhs, const uint128_t rhs) noexcept; template -constexpr std::strong_ordering operator<=>(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr std::strong_ordering operator<=>(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const Integer lhs, const uint128_t rhs) noexcept; ---- Returns one of the following without exception: @@ -324,7 +324,7 @@ Returns one of the following without exception: [source, c++] ---- -constexpr uint128_t operator~(const uint128_t rhs) noexcept +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator~(const uint128_t rhs) noexcept ---- Returns the bitwise negation of `rhs` without exception. @@ -334,12 +334,12 @@ Returns the bitwise negation of `rhs` without exception. [source, c++] ---- template -constexpr uint128_t operator|(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator|(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator|(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns the bitwise or of `lhs` and `rhs` without exception. @@ -350,12 +350,12 @@ This operation is subject to mixed sign limitations discussed xref:uint128_t.ado [source, c++] ---- template -constexpr uint128_t operator&(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator&(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator&(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns the bitwise and of `lhs` and `rhs` without exception. @@ -366,12 +366,12 @@ This operation is subject to mixed sign limitations discussed xref:uint128_t.ado [source, c++] ---- template -constexpr uint128_t operator^(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator^(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator^(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns the bitwise xor of `lhs` and `rhs` without exception. @@ -382,18 +382,18 @@ This operation is subject to mixed sign limitations discussed xref:uint128_t.ado [source, c++] ---- template -constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept; template ::value && (sizeof(Integer) * 8 > 16), bool> = true> -constexpr Integer operator<<(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr Integer operator<<(const Integer lhs, const uint128_t rhs) noexcept; template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept; template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator<<(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns the bitwise left shift of `lhs` without exception. @@ -405,18 +405,18 @@ This operation is subject to mixed sign limitations discussed xref:uint128_t.ado [source, c++] ---- template -constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept; template ::value && (sizeof(Integer) * 8 > 16), bool> = true> -constexpr Integer operator<<(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr Integer operator<<(const Integer lhs, const uint128_t rhs) noexcept; template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept; template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator<<(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns the bitwise right shift of `lhs` without exception. @@ -432,12 +432,12 @@ This operation is subject to mixed sign limitations discussed xref:uint128_t.ado [source, c++] ---- template -constexpr uint128_t operator+(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator+(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator+(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns as a `uint128_t` the sum of `lhs` and `rhs`. @@ -449,12 +449,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr uint128_t operator-(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator-(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator-(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns as a `uint128_t` the difference of `lhs` and `rhs`. @@ -466,12 +466,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr uint128_t operator*(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator*(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator*(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns as a `uint128_t` the product of `lhs` and `rhs`. @@ -483,12 +483,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr uint128_t operator/(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator/(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator/(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns as a `uint128_t` the quotient of `lhs` and `rhs` without exception. @@ -499,12 +499,12 @@ This operation is only defined for integers and is subject to mixed sign limitat [source, c++] ---- template -constexpr uint128_t operator%(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const uint128_t lhs, const Integer rhs) noexcept; template -constexpr uint128_t operator%(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const Integer lhs, const uint128_t rhs) noexcept; -constexpr uint128_t operator%(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns as a `uint128_t` the remainder of `lhs` and `rhs` without exception. From 3c034f292fecbf6ef70874aa8c29243e8c04c0b9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 15:34:09 -0400 Subject: [PATCH 051/137] Add CUDA testing path to test/CML --- test/CMakeLists.txt | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 637982d4..f2fbf41e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,6 +7,25 @@ include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST) if(HAVE_BOOST_TEST) - boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::int128 Boost::core Boost::random Boost::multiprecision Boost::mp11 Boost::charconv) + enable_testing() + + if(BOOST_INT128_ENABLE_CUDA) + + message(STATUS "Building Boost.int128 with CUDA") + + find_package(CUDA REQUIRED) + enable_language(CUDA) + set(CMAKE_CUDA_EXTENSIONS OFF) + + enable_testing() + + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + + + else() + + boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::int128 Boost::core Boost::random Boost::multiprecision Boost::mp11 Boost::charconv) + + endif() endif() From 5fbe7c495d254dea3b6d742804a62250685c4c63 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 15:45:52 -0400 Subject: [PATCH 052/137] Fix warning #20012-D --- include/boost/int128/detail/int128_imp.hpp | 10 +++++----- include/boost/int128/detail/uint128_imp.hpp | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 071fec63..d4889f0b 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -49,11 +49,11 @@ int128_t #endif // Defaulted basic construction - BOOST_INT128_HOST_DEVICE constexpr int128_t() noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr int128_t(const int128_t&) noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr int128_t(int128_t&&) noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(const int128_t&) noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(int128_t&&) noexcept = default; + constexpr int128_t() noexcept = default; + constexpr int128_t(const int128_t&) noexcept = default; + constexpr int128_t(int128_t&&) noexcept = default; + constexpr int128_t& operator=(const int128_t&) noexcept = default; + constexpr int128_t& operator=(int128_t&&) noexcept = default; // Requires a conversion file to be implemented BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept; diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index 21b4ebf5..5416bbad 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -50,11 +50,11 @@ uint128_t #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE // Defaulted basic construction - BOOST_INT128_HOST_DEVICE constexpr uint128_t() noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr uint128_t(const uint128_t&) noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr uint128_t(uint128_t&&) noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(const uint128_t&) noexcept = default; - BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(uint128_t&&) noexcept = default; + constexpr uint128_t() noexcept = default; + constexpr uint128_t(const uint128_t&) noexcept = default; + constexpr uint128_t(uint128_t&&) noexcept = default; + constexpr uint128_t& operator=(const uint128_t&) noexcept = default; + constexpr uint128_t& operator=(uint128_t&&) noexcept = default; // Requires a conversion file to be implemented BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept; From 8b1f2f466e40e1354b5b422312210d656cf21078 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 15:47:02 -0400 Subject: [PATCH 053/137] Fix test distribution type --- test/test_add.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_add.cu b/test/test_add.cu index e5bb6e02..11ec6404 100644 --- a/test/test_add.cu +++ b/test/test_add.cu @@ -54,7 +54,7 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution<> dist(std::numeric_limits::min() / test_type{2}, std::numeric_limits::max() / test_type{2}); + boost::random::uniform_int_distribution dist(std::numeric_limits::min() / test_type{2}, std::numeric_limits::max() / test_type{2}); for (int i = 0; i < numElements; ++i) { input_vector[i] = dist(rng); From 7d64bf9904f4079fd704738003d48af6caa1027f Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 15:52:59 -0400 Subject: [PATCH 054/137] Fix sign conversion error --- test/test_add.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_add.cu b/test/test_add.cu index 11ec6404..969eae78 100644 --- a/test/test_add.cu +++ b/test/test_add.cu @@ -54,8 +54,8 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution dist(std::numeric_limits::min() / test_type{2}, std::numeric_limits::max() / test_type{2}); - for (int i = 0; i < numElements; ++i) + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)() / test_type{2}}; + for (std::size_t i = 0; i < numElements; ++i) { input_vector[i] = dist(rng); } From a0cc80d8723529ec46645486a6c6da0ca9db669a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 16:05:44 -0400 Subject: [PATCH 055/137] Allow sign conversion --- test/test_add.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_add.cu b/test/test_add.cu index 969eae78..16d87047 100644 --- a/test/test_add.cu +++ b/test/test_add.cu @@ -4,6 +4,8 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + #include #include #include From 39b0bf620a97ca8e917a273bf9c99e36160fb782 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 16:08:39 -0400 Subject: [PATCH 056/137] Mark numeric limits functions as HD --- include/boost/int128/detail/uint128_imp.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index 5416bbad..f3e08a8f 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -3342,15 +3342,15 @@ class numeric_limits_impl_u128 static constexpr bool tinyness_before = false; // Member functions - static constexpr auto (min) () -> boost::int128::uint128_t { return {0, 0}; } - static constexpr auto lowest () -> boost::int128::uint128_t { return {0, 0}; } - static constexpr auto (max) () -> boost::int128::uint128_t { return {UINT64_MAX, UINT64_MAX}; } - static constexpr auto epsilon () -> boost::int128::uint128_t { return {0, 0}; } - static constexpr auto round_error () -> boost::int128::uint128_t { return {0, 0}; } - static constexpr auto infinity () -> boost::int128::uint128_t { return {0, 0}; } - static constexpr auto quiet_NaN () -> boost::int128::uint128_t { return {0, 0}; } - static constexpr auto signaling_NaN() -> boost::int128::uint128_t { return {0, 0}; } - static constexpr auto denorm_min () -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto (min) () -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto lowest () -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto (max) () -> boost::int128::uint128_t { return {UINT64_MAX, UINT64_MAX}; } + BOOST_INT128_HOST_DEVICE static constexpr auto epsilon () -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto round_error () -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto infinity () -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto quiet_NaN () -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto signaling_NaN() -> boost::int128::uint128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto denorm_min () -> boost::int128::uint128_t { return {0, 0}; } }; #if !defined(__cpp_inline_variables) || __cpp_inline_variables < 201606L From e5eb38607bb4e483f3bb4c3566c5ce5d74e37774 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Mar 2026 16:12:26 -0400 Subject: [PATCH 057/137] Change supported architectures --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2e6bc882..08b59633 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1195,7 +1195,7 @@ jobs: run: | cd ../boost-root mkdir __build__ && cd __build__ - cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=86 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 .. + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 .. - name: Build tests run: | cd ../boost-root/__build__ From dfc5371931f226220015a995600e5f27a7306bee Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 11:23:09 -0400 Subject: [PATCH 058/137] Add signed addition cuda test --- test/cuda_jamfile | 3 +- test/test_signed_add.cu | 108 +++++++++++++++++++++ test/{test_add.cu => test_unsigned_add.cu} | 0 3 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 test/test_signed_add.cu rename test/{test_add.cu => test_unsigned_add.cu} (100%) diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 05e22d5b..6a95276c 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -9,4 +9,5 @@ project : requirements [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] ; -run test_add.cu ; \ No newline at end of file +run test_unsigned_add.cu ; +run test_signed_add.cu ; diff --git a/test/test_signed_add.cu b/test/test_signed_add.cu new file mode 100644 index 00000000..6b1ad9bc --- /dev/null +++ b/test/test_signed_add.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] + in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() / test_type{2}, (std::numeric_limits::max)() / test_type{2}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] + input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_add.cu b/test/test_unsigned_add.cu similarity index 100% rename from test/test_add.cu rename to test/test_unsigned_add.cu From f4943964475f425ff2da5f8eede26db0c14314fe Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 11:24:43 -0400 Subject: [PATCH 059/137] Add subtraction testing --- test/cuda_jamfile | 2 + test/test_signed_sub.cu | 108 ++++++++++++++++++++++++++++++++++++++ test/test_unsigned_sub.cu | 108 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 218 insertions(+) create mode 100644 test/test_signed_sub.cu create mode 100644 test/test_unsigned_sub.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 6a95276c..e339968f 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -11,3 +11,5 @@ project : requirements run test_unsigned_add.cu ; run test_signed_add.cu ; +run test_unsigned_sub.cu ; +run test_signed_sub.cu ; diff --git a/test/test_signed_sub.cu b/test/test_signed_sub.cu new file mode 100644 index 00000000..11910db0 --- /dev/null +++ b/test/test_signed_sub.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] - in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() / test_type{2}, (std::numeric_limits::max)() / test_type{2}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] - input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_sub.cu b/test/test_unsigned_sub.cu new file mode 100644 index 00000000..d1680615 --- /dev/null +++ b/test/test_unsigned_sub.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] - in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)() / test_type{2}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] - input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 8a71afd5b9daaebae1fc5bbeb48ac11bd8d6ae6d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 11:45:33 -0400 Subject: [PATCH 060/137] Expand overview section --- doc/modules/ROOT/pages/overview.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/overview.adoc b/doc/modules/ROOT/pages/overview.adoc index 01fb1a36..42d208ce 100644 --- a/doc/modules/ROOT/pages/overview.adoc +++ b/doc/modules/ROOT/pages/overview.adoc @@ -22,9 +22,10 @@ When using pass:[C++20] or newer, the library can be consumed as a module `impor 128-bit integers are useful across many domains, but pass:[C++] provides no portable way to use them. GCC and Clang offer `__int128` as a non-standard extension on 64-bit targets, but it lacks `std::numeric_limits` specializations, `` support, and is absent entirely on MSVC. -Multiprecision libraries can fill the gap, but typically at the cost of a larger `sizeof` and additional overhead. +Multiprecision libraries can fill the gap, but typically at the cost of a larger `sizeof` and additional overhead (e.g., Boost.Multiprecision always has an extra word). Boost.Int128 solves this by providing types that are exactly 128-bits on every platform. Operation implementations rely on compiler intrinsic where available for native performance, and optimized software implementations elsewhere. +The types provided by the library also have native support being running on GPU using CUDA, along with many of the functions. == Use Cases @@ -41,6 +42,7 @@ as well as emulated PPC64LE using QEMU with the following compilers: * Clang 5 and later * Visual Studio 2017 (14.1) and later * Intel OneAPI DPC++ 2024.2 and later +* NVCC 12.8 and later Tested on https://github.com/cppalliance/int128/actions[Github Actions] and https://drone.cpp.al/cppalliance/int128[Drone]. Coverage can be found on https://app.codecov.io/gh/cppalliance/int128[Codecov]. From 1228d4fbe9309118c7cf5b5f159d8a50a0b80e41 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 11:46:01 -0400 Subject: [PATCH 061/137] Fixed redundant check warnings --- include/boost/int128/detail/int128_imp.hpp | 14 +++++++++-- include/boost/int128/detail/uint128_imp.hpp | 28 ++++++++++++++++++--- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index d4889f0b..9f88af4e 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -1637,9 +1637,19 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t default_ls_impl(const int128_t lhs, { static_assert(std::is_integral::value, "Only builtin types allowed"); - if (rhs < 0 || rhs >= 128) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) { - return {0, 0}; + if (rhs < 0 || rhs >= 128) + { + return {0, 0}; + } + } + else + { + if (rhs >= 128) + { + return {0, 0}; + } } if (rhs == 0) diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index f3e08a8f..aa52bff0 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -1741,9 +1741,19 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t default_ls_impl(const uint128_t lhs { static_assert(std::is_integral::value, "Needs to be a builtin type"); - if (rhs < 0 || rhs >= 128) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) { - return {0, 0}; + if (rhs < 0 || rhs >= 128) + { + return {0, 0}; + } + } + else + { + if (rhs >= 128) + { + return {0, 0}; + } } if (rhs == 0) @@ -1948,9 +1958,19 @@ namespace detail { template BOOST_INT128_HOST_DEVICE constexpr uint128_t default_rs_impl(const uint128_t lhs, const Integer rhs) noexcept { - if (rhs < 0 || rhs >= 128) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) { - return {0, 0}; + if (rhs < 0 || rhs >= 128) + { + return {0, 0}; + } + } + else + { + if (rhs >= 128) + { + return {0, 0}; + } } if (rhs == 0) From 2fe8cb05ca22e24cf4373c6cde3fb2b160434549 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 11:46:12 -0400 Subject: [PATCH 062/137] Add HD annotations to signed numeric limits --- include/boost/int128/detail/int128_imp.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 9f88af4e..dbf1d9d0 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -3413,15 +3413,15 @@ class numeric_limits_impl_i128 static constexpr bool tinyness_before = false; // Member functions - static constexpr auto (min) () -> boost::int128::int128_t { return {INT64_MIN, 0}; } - static constexpr auto lowest () -> boost::int128::int128_t { return {INT64_MIN, 0}; } - static constexpr auto (max) () -> boost::int128::int128_t { return {INT64_MAX, UINT64_MAX}; } - static constexpr auto epsilon () -> boost::int128::int128_t { return {0, 0}; } - static constexpr auto round_error () -> boost::int128::int128_t { return {0, 0}; } - static constexpr auto infinity () -> boost::int128::int128_t { return {0, 0}; } - static constexpr auto quiet_NaN () -> boost::int128::int128_t { return {0, 0}; } - static constexpr auto signaling_NaN() -> boost::int128::int128_t { return {0, 0}; } - static constexpr auto denorm_min () -> boost::int128::int128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto (min) () -> boost::int128::int128_t { return {INT64_MIN, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto lowest () -> boost::int128::int128_t { return {INT64_MIN, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto (max) () -> boost::int128::int128_t { return {INT64_MAX, UINT64_MAX}; } + BOOST_INT128_HOST_DEVICE static constexpr auto epsilon () -> boost::int128::int128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto round_error () -> boost::int128::int128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto infinity () -> boost::int128::int128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto quiet_NaN () -> boost::int128::int128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto signaling_NaN() -> boost::int128::int128_t { return {0, 0}; } + BOOST_INT128_HOST_DEVICE static constexpr auto denorm_min () -> boost::int128::int128_t { return {0, 0}; } }; #if !defined(__cpp_inline_variables) || __cpp_inline_variables < 201606L From 362e98e06a3c2bf5acdbe22dcf2bc0654a192e0d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 12:06:40 -0400 Subject: [PATCH 063/137] Add boost.random support header --- test/test_signed_add.cu | 1 + test/test_signed_sub.cu | 1 + test/test_unsigned_add.cu | 1 + test/test_unsigned_sub.cu | 1 + 4 files changed, 4 insertions(+) diff --git a/test/test_signed_add.cu b/test/test_signed_add.cu index 6b1ad9bc..57369852 100644 --- a/test/test_signed_add.cu +++ b/test/test_signed_add.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "cuda_managed_ptr.hpp" #include "stopwatch.hpp" diff --git a/test/test_signed_sub.cu b/test/test_signed_sub.cu index 11910db0..dd48db3a 100644 --- a/test/test_signed_sub.cu +++ b/test/test_signed_sub.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "cuda_managed_ptr.hpp" #include "stopwatch.hpp" diff --git a/test/test_unsigned_add.cu b/test/test_unsigned_add.cu index 16d87047..59368281 100644 --- a/test/test_unsigned_add.cu +++ b/test/test_unsigned_add.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "cuda_managed_ptr.hpp" #include "stopwatch.hpp" diff --git a/test/test_unsigned_sub.cu b/test/test_unsigned_sub.cu index d1680615..b4fc0f87 100644 --- a/test/test_unsigned_sub.cu +++ b/test/test_unsigned_sub.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "cuda_managed_ptr.hpp" #include "stopwatch.hpp" From b06b6b63a944d4d90010bcc533ce4c7217b9ad40 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 13:42:00 -0400 Subject: [PATCH 064/137] Add mul and div CUDA tests --- test/cuda_jamfile | 4 ++ test/test_signed_div.cu | 111 ++++++++++++++++++++++++++++++++++++++ test/test_signed_mul.cu | 109 +++++++++++++++++++++++++++++++++++++ test/test_unsigned_div.cu | 109 +++++++++++++++++++++++++++++++++++++ test/test_unsigned_mul.cu | 109 +++++++++++++++++++++++++++++++++++++ 5 files changed, 442 insertions(+) create mode 100644 test/test_signed_div.cu create mode 100644 test/test_signed_mul.cu create mode 100644 test/test_unsigned_div.cu create mode 100644 test/test_unsigned_mul.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index e339968f..8559e9b5 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -13,3 +13,7 @@ run test_unsigned_add.cu ; run test_signed_add.cu ; run test_unsigned_sub.cu ; run test_signed_sub.cu ; +run test_unsigned_mul.cu ; +run test_signed_mul.cu ; +run test_unsigned_div.cu ; +run test_signed_div.cu ; diff --git a/test/test_signed_div.cu b/test/test_signed_div.cu new file mode 100644 index 00000000..0b3a5416 --- /dev/null +++ b/test/test_signed_div.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] / in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] / input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << '\n' + << "Got: " << output_vector[i] << "\n" + << "Expected: " << results[i] << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_mul.cu b/test/test_signed_mul.cu new file mode 100644 index 00000000..1c9a12fd --- /dev/null +++ b/test/test_signed_mul.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] * in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] * input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_div.cu b/test/test_unsigned_div.cu new file mode 100644 index 00000000..4b56b115 --- /dev/null +++ b/test/test_unsigned_div.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] / in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{1U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] / input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_mul.cu b/test/test_unsigned_mul.cu new file mode 100644 index 00000000..fb32b655 --- /dev/null +++ b/test/test_unsigned_mul.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] * in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{std::numeric_limits::max()}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] * input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 3c9951b17d38b495cbcfd45adfbc211d1d8a5114 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 13:56:21 -0400 Subject: [PATCH 065/137] Move static tables with NVCC --- include/boost/int128/detail/mini_to_chars.hpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/boost/int128/detail/mini_to_chars.hpp b/include/boost/int128/detail/mini_to_chars.hpp index 3355a369..31a5f04d 100644 --- a/include/boost/int128/detail/mini_to_chars.hpp +++ b/include/boost/int128/detail/mini_to_chars.hpp @@ -12,6 +12,8 @@ namespace boost { namespace int128 { namespace detail { +#ifndef __NVCC__ + BOOST_INT128_INLINE_CONSTEXPR char lower_case_digit_table[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' @@ -26,8 +28,22 @@ BOOST_INT128_INLINE_CONSTEXPR char upper_case_digit_table[] = { static_assert(sizeof(upper_case_digit_table) == sizeof(char) * 16, "10 numbers, and 6 letters"); +#endif // !__NVCC__ + BOOST_INT128_HOST_DEVICE constexpr char* mini_to_chars(char (&buffer)[64], uint128_t v, const int base, const bool uppercase) noexcept { + #ifdef __NVCC__ + constexpr char lower_case_digit_table[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f' + }; + + constexpr char upper_case_digit_table[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' + }; + #endif + char* last {buffer + 64U}; *--last = '\0'; From 57a0d5005b79ae6c6bde9ef4622ea3bb6149d1a2 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 15:53:42 -0400 Subject: [PATCH 066/137] Disable use of long double on device --- include/boost/int128/detail/int128_imp.hpp | 12 ++++++++++-- include/boost/int128/detail/uint128_imp.hpp | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index dbf1d9d0..28364f99 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -117,7 +117,11 @@ int128_t // but can be constexpr at C++11 instead of C++26 BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; - BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; + + // Long double does not exist on device + #ifndef __NVCC__ + explicit constexpr operator long double() const noexcept; + #endif // Compound Or template @@ -292,11 +296,15 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t::operator double() const noexcept return static_cast(high) * detail::offset_value_v + static_cast(low); } -BOOST_INT128_HOST_DEVICE constexpr int128_t::operator long double() const noexcept +#ifndef __NVCC__ + +constexpr int128_t::operator long double() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } +#endif + //===================================== // Unary Operators //===================================== diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index aa52bff0..224305b1 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -128,7 +128,11 @@ uint128_t // but can be constexpr at C++11 instead of C++26 BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; - BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; + + // long doubles do not exist on device + #ifndef __NVCC__ + explicit constexpr operator long double() const noexcept; + #endif // Compound OR template @@ -294,11 +298,15 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator double() const noexcept return static_cast(high) * detail::offset_value_v + static_cast(low); } -BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator long double() const noexcept +#ifndef __NVCC__ + +constexpr uint128_t::operator long double() const noexcept { return static_cast(high) * detail::offset_value_v + static_cast(low); } +#endif // __NVCC__ + //===================================== // Unary Operators //===================================== From 610187a9d06712638352b2412838afcffb276f77 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 15:57:39 -0400 Subject: [PATCH 067/137] Fix NVCC warnings #186-D --- include/boost/int128/detail/int128_imp.hpp | 28 ++++++++++++++++--- include/boost/int128/detail/uint128_imp.hpp | 30 ++++++++++++++++++--- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 28364f99..5c4a38cb 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -1688,9 +1688,19 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t default_ls_impl(const int128_t lhs, template BOOST_INT128_HOST_DEVICE int128_t intrinsic_ls_impl(const int128_t lhs, const Integer rhs) noexcept { - if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) { - return {0, 0}; + if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) + { + return {0, 0}; + } + } + else + { + if (BOOST_INT128_UNLIKELY(rhs >= 128)) + { + return {0, 0}; + } } #ifdef BOOST_INT128_HAS_INT128 @@ -1896,9 +1906,19 @@ namespace detail { template BOOST_INT128_HOST_DEVICE constexpr int128_t default_rs_impl(const int128_t lhs, const Integer rhs) noexcept { - if (rhs >= 128 || rhs < 0 ) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) + { + if (rhs >= 128 || rhs < 0) + { + return lhs.high < 0 ? int128_t{-1, UINT64_MAX} : int128_t{0, 0}; + } + } + else { - return lhs.high < 0 ? int128_t{-1, UINT64_MAX} : int128_t{0, 0}; + if (rhs >= 128) + { + return lhs.high < 0 ? int128_t{-1, UINT64_MAX} : int128_t{0, 0}; + } } if (rhs == 0) diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index 224305b1..26aa9370 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -1788,10 +1788,21 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t default_ls_impl(const uint128_t lhs template BOOST_INT128_HOST_DEVICE uint128_t intrinsic_ls_impl(const uint128_t lhs, const T rhs) noexcept { - if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) { - return {0, 0}; + if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) + { + return {0, 0}; + } + } + else + { + if (BOOST_INT128_UNLIKELY(rhs >= 128)) + { + return {0, 0}; + } } + if (BOOST_INT128_UNLIKELY(rhs == 0)) { return lhs; @@ -2005,10 +2016,21 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t default_rs_impl(const uint128_t lhs template BOOST_INT128_HOST_DEVICE uint128_t intrinsic_rs_impl(const uint128_t lhs, const Integer rhs) noexcept { - if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) { - return {0, 0}; + if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) + { + return {0, 0}; + } } + else + { + if (BOOST_INT128_UNLIKELY(rhs >= 128)) + { + return {0, 0}; + } + } + if (BOOST_INT128_UNLIKELY(rhs == 0)) { return lhs; From e23092a28e16765ff8512d75b72a24c5ff06de5d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 16:02:30 -0400 Subject: [PATCH 068/137] Avoid using host compiler intrinsic on device --- include/boost/int128/detail/int128_imp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 5c4a38cb..0daae392 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -2255,7 +2255,7 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t library_su BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, const int128_t rhs) noexcept { - #if defined(BOOST_INT128_HAS_BUILTIN_SUB_OVERFLOW) && (!defined(__aarch64__) || defined(__APPLE__) || !defined(BOOST_INT128_HAS_INT128)) + #if defined(BOOST_INT128_HAS_BUILTIN_SUB_OVERFLOW) && (!defined(__aarch64__) || defined(__APPLE__) || !defined(BOOST_INT128_HAS_INT128)) && !defined(__NVCC__) // __builtin_sub_overflow is marked constexpr so we don't need if consteval handling std::uint64_t result_low {}; From 4ea81be16031cf8afa8d49e68e3c056d550483ea Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 16:04:26 -0400 Subject: [PATCH 069/137] Use C++17 on device to handle a bunch of warnings --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 08b59633..7bc04bc6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1195,7 +1195,7 @@ jobs: run: | cd ../boost-root mkdir __build__ && cd __build__ - cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 .. + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 -DCMAKE_CUDA_STANDARD=17 .. - name: Build tests run: | cd ../boost-root/__build__ From a2fc9deb78be88f0cc19b29f1afdde9fb586cb6a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 16:28:55 -0400 Subject: [PATCH 070/137] Add testing of mod --- test/cuda_jamfile | 2 + test/test_signed_mod.cu | 111 ++++++++++++++++++++++++++++++++++++++ test/test_unsigned_mod.cu | 109 +++++++++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 test/test_signed_mod.cu create mode 100644 test/test_unsigned_mod.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 8559e9b5..901a187f 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -17,3 +17,5 @@ run test_unsigned_mul.cu ; run test_signed_mul.cu ; run test_unsigned_div.cu ; run test_signed_div.cu ; +run test_unsigned_mod.cu ; +run test_signed_mod.cu ; diff --git a/test/test_signed_mod.cu b/test/test_signed_mod.cu new file mode 100644 index 00000000..407a7653 --- /dev/null +++ b/test/test_signed_mod.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] % in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] % input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << '\n' + << "Got: " << output_vector[i] << "\n" + << "Expected: " << results[i] << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_mod.cu b/test/test_unsigned_mod.cu new file mode 100644 index 00000000..838b2078 --- /dev/null +++ b/test/test_unsigned_mod.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] % in[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{1U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] % input_vector[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 7ce948b39a57f6b555167950ff9d806939c5e70a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Mar 2026 16:31:34 -0400 Subject: [PATCH 071/137] Improve div and mod testing --- test/test_signed_div.cu | 10 ++++++---- test/test_signed_mod.cu | 10 ++++++---- test/test_unsigned_div.cu | 10 ++++++---- test/test_unsigned_mod.cu | 10 ++++++---- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/test/test_signed_div.cu b/test/test_signed_div.cu index 0b3a5416..eb10a192 100644 --- a/test/test_signed_div.cu +++ b/test/test_signed_div.cu @@ -26,13 +26,13 @@ using test_type = boost::int128::int128_t; * CUDA Kernel Device code * */ -__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < numElements) { - out[i] = in[i] / in[i]; + out[i] = in[i] / in2[i]; } } @@ -52,6 +52,7 @@ int main(void) // Allocate the managed input vector A cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); // Allocate the managed output vector C cuda_managed_ptr output_vector(numElements); @@ -61,6 +62,7 @@ int main(void) for (std::size_t i = 0; i < numElements; ++i) { input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); } // Launch the Vector Add CUDA Kernel @@ -70,7 +72,7 @@ int main(void) watch w; - cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); cudaDeviceSynchronize(); std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; @@ -89,7 +91,7 @@ int main(void) w.reset(); for(int i = 0; i < numElements; ++i) { - results.push_back(input_vector[i] / input_vector[i]); + results.push_back(input_vector[i] / input_vector2[i]); } double t = w.elapsed(); // check the results diff --git a/test/test_signed_mod.cu b/test/test_signed_mod.cu index 407a7653..cbda3580 100644 --- a/test/test_signed_mod.cu +++ b/test/test_signed_mod.cu @@ -26,13 +26,13 @@ using test_type = boost::int128::int128_t; * CUDA Kernel Device code * */ -__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < numElements) { - out[i] = in[i] % in[i]; + out[i] = in[i] % in2[i]; } } @@ -52,6 +52,7 @@ int main(void) // Allocate the managed input vector A cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); // Allocate the managed output vector C cuda_managed_ptr output_vector(numElements); @@ -61,6 +62,7 @@ int main(void) for (std::size_t i = 0; i < numElements; ++i) { input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); } // Launch the Vector Add CUDA Kernel @@ -70,7 +72,7 @@ int main(void) watch w; - cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); cudaDeviceSynchronize(); std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; @@ -89,7 +91,7 @@ int main(void) w.reset(); for(int i = 0; i < numElements; ++i) { - results.push_back(input_vector[i] % input_vector[i]); + results.push_back(input_vector[i] % input_vector2[i]); } double t = w.elapsed(); // check the results diff --git a/test/test_unsigned_div.cu b/test/test_unsigned_div.cu index 4b56b115..fb3070a2 100644 --- a/test/test_unsigned_div.cu +++ b/test/test_unsigned_div.cu @@ -26,13 +26,13 @@ using test_type = boost::int128::uint128_t; * CUDA Kernel Device code * */ -__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < numElements) { - out[i] = in[i] / in[i]; + out[i] = in[i] / in2[i]; } } @@ -52,6 +52,7 @@ int main(void) // Allocate the managed input vector A cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); // Allocate the managed output vector C cuda_managed_ptr output_vector(numElements); @@ -61,6 +62,7 @@ int main(void) for (std::size_t i = 0; i < numElements; ++i) { input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); } // Launch the Vector Add CUDA Kernel @@ -70,7 +72,7 @@ int main(void) watch w; - cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); cudaDeviceSynchronize(); std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; @@ -89,7 +91,7 @@ int main(void) w.reset(); for(int i = 0; i < numElements; ++i) { - results.push_back(input_vector[i] / input_vector[i]); + results.push_back(input_vector[i] / input_vector2[i]); } double t = w.elapsed(); // check the results diff --git a/test/test_unsigned_mod.cu b/test/test_unsigned_mod.cu index 838b2078..56e31095 100644 --- a/test/test_unsigned_mod.cu +++ b/test/test_unsigned_mod.cu @@ -26,13 +26,13 @@ using test_type = boost::int128::uint128_t; * CUDA Kernel Device code * */ -__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < numElements) { - out[i] = in[i] % in[i]; + out[i] = in[i] % in2[i]; } } @@ -52,6 +52,7 @@ int main(void) // Allocate the managed input vector A cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); // Allocate the managed output vector C cuda_managed_ptr output_vector(numElements); @@ -61,6 +62,7 @@ int main(void) for (std::size_t i = 0; i < numElements; ++i) { input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); } // Launch the Vector Add CUDA Kernel @@ -70,7 +72,7 @@ int main(void) watch w; - cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); cudaDeviceSynchronize(); std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; @@ -89,7 +91,7 @@ int main(void) w.reset(); for(int i = 0; i < numElements; ++i) { - results.push_back(input_vector[i] % input_vector[i]); + results.push_back(input_vector[i] % input_vector2[i]); } double t = w.elapsed(); // check the results From 684eb8f5ab22b1b1c6a6ed3328f00e0ce131d20b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 08:55:00 -0400 Subject: [PATCH 072/137] Add asciidoc pagination --- doc/antora.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/antora.yml b/doc/antora.yml index 11c33ddb..68478dfe 100644 --- a/doc/antora.yml +++ b/doc/antora.yml @@ -1,6 +1,9 @@ name: ROOT version: ~ title: Boost.int128 +asciidoc: + attributes: + page-pagination: '' nav: - modules/ROOT/nav.adoc start_page: overview.adoc From bf298110540ab2624ae8aa3a3ef19784a45e0bfe Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 09:34:31 -0400 Subject: [PATCH 073/137] Add CUDA testing of bitwise operations --- test/cuda_jamfile | 13 ++++ test/test_signed_and.cu | 95 ++++++++++++++++++++++++++++++ test/test_signed_left_shift.cu | 97 ++++++++++++++++++++++++++++++ test/test_signed_not.cu | 93 +++++++++++++++++++++++++++++ test/test_signed_or.cu | 95 ++++++++++++++++++++++++++++++ test/test_signed_right_shift.cu | 98 +++++++++++++++++++++++++++++++ test/test_signed_xor.cu | 95 ++++++++++++++++++++++++++++++ test/test_unsigned_and.cu | 95 ++++++++++++++++++++++++++++++ test/test_unsigned_left_shift.cu | 96 ++++++++++++++++++++++++++++++ test/test_unsigned_not.cu | 93 +++++++++++++++++++++++++++++ test/test_unsigned_or.cu | 95 ++++++++++++++++++++++++++++++ test/test_unsigned_right_shift.cu | 96 ++++++++++++++++++++++++++++++ test/test_unsigned_xor.cu | 95 ++++++++++++++++++++++++++++++ 13 files changed, 1156 insertions(+) create mode 100644 test/test_signed_and.cu create mode 100644 test/test_signed_left_shift.cu create mode 100644 test/test_signed_not.cu create mode 100644 test/test_signed_or.cu create mode 100644 test/test_signed_right_shift.cu create mode 100644 test/test_signed_xor.cu create mode 100644 test/test_unsigned_and.cu create mode 100644 test/test_unsigned_left_shift.cu create mode 100644 test/test_unsigned_not.cu create mode 100644 test/test_unsigned_or.cu create mode 100644 test/test_unsigned_right_shift.cu create mode 100644 test/test_unsigned_xor.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 901a187f..124034a3 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -19,3 +19,16 @@ run test_unsigned_div.cu ; run test_signed_div.cu ; run test_unsigned_mod.cu ; run test_signed_mod.cu ; + +test_unsigned_or.cu ; +test_signed_or.cu ; +test_unsigned_and.cu ; +test_signed_and.cu ; +test_unsigned_xor.cu ; +test_signed_xor.cu ; +test_unsigned_not.cu ; +test_signed_not.cu ; +test_unsigned_left_shift.cu ; +test_signed_left_shift.cu ; +test_unsigned_right_shift.cu ; +test_signed_right_shift.cu ; diff --git a/test/test_signed_and.cu b/test/test_signed_and.cu new file mode 100644 index 00000000..95fb3fec --- /dev/null +++ b/test/test_signed_and.cu @@ -0,0 +1,95 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] & in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] & input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_left_shift.cu b/test/test_signed_left_shift.cu new file mode 100644 index 00000000..89cf0a67 --- /dev/null +++ b/test/test_signed_left_shift.cu @@ -0,0 +1,97 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const unsigned *shift, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] << shift[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr shift_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + // Use non-negative values only to avoid UB with signed left shift of negative values + boost::random::uniform_int_distribution dist {test_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution shift_dist {0U, 127U}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + shift_vector[i] = shift_dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), shift_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] << shift_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_not.cu b/test/test_signed_not.cu new file mode 100644 index 00000000..5dc285c6 --- /dev/null +++ b/test/test_signed_not.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = ~in[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(~input_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_or.cu b/test/test_signed_or.cu new file mode 100644 index 00000000..7bcf7a6e --- /dev/null +++ b/test/test_signed_or.cu @@ -0,0 +1,95 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] | in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] | input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_right_shift.cu b/test/test_signed_right_shift.cu new file mode 100644 index 00000000..c606ddec --- /dev/null +++ b/test/test_signed_right_shift.cu @@ -0,0 +1,98 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const unsigned *shift, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] >> shift[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr shift_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + // Include negative values — right shift of negative signed integers is + // implementation-defined (arithmetic shift) but not UB + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::uniform_int_distribution shift_dist {0U, 127U}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + shift_vector[i] = shift_dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), shift_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] >> shift_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_xor.cu b/test/test_signed_xor.cu new file mode 100644 index 00000000..ff11af14 --- /dev/null +++ b/test/test_signed_xor.cu @@ -0,0 +1,95 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] ^ in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] ^ input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_and.cu b/test/test_unsigned_and.cu new file mode 100644 index 00000000..7ced87e1 --- /dev/null +++ b/test/test_unsigned_and.cu @@ -0,0 +1,95 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] & in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] & input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_left_shift.cu b/test/test_unsigned_left_shift.cu new file mode 100644 index 00000000..053c054b --- /dev/null +++ b/test/test_unsigned_left_shift.cu @@ -0,0 +1,96 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const unsigned *shift, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] << shift[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr shift_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::uniform_int_distribution shift_dist {0U, 127U}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + shift_vector[i] = shift_dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), shift_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] << shift_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_not.cu b/test/test_unsigned_not.cu new file mode 100644 index 00000000..809baf3c --- /dev/null +++ b/test/test_unsigned_not.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = ~in[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(~input_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_or.cu b/test/test_unsigned_or.cu new file mode 100644 index 00000000..45ebf30d --- /dev/null +++ b/test/test_unsigned_or.cu @@ -0,0 +1,95 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] | in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] | input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_right_shift.cu b/test/test_unsigned_right_shift.cu new file mode 100644 index 00000000..f81792f2 --- /dev/null +++ b/test/test_unsigned_right_shift.cu @@ -0,0 +1,96 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const unsigned *shift, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] >> shift[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr shift_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::uniform_int_distribution shift_dist {0U, 127U}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + shift_vector[i] = shift_dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), shift_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] >> shift_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_xor.cu b/test/test_unsigned_xor.cu new file mode 100644 index 00000000..8201a432 --- /dev/null +++ b/test/test_unsigned_xor.cu @@ -0,0 +1,95 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] ^ in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] ^ input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From fa9531d61e6bebee23f269401304f95c37d49fc2 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 09:42:50 -0400 Subject: [PATCH 074/137] Fix syntax in Jamfile --- test/cuda_jamfile | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 124034a3..343faf83 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -20,15 +20,15 @@ run test_signed_div.cu ; run test_unsigned_mod.cu ; run test_signed_mod.cu ; -test_unsigned_or.cu ; -test_signed_or.cu ; -test_unsigned_and.cu ; -test_signed_and.cu ; -test_unsigned_xor.cu ; -test_signed_xor.cu ; -test_unsigned_not.cu ; -test_signed_not.cu ; -test_unsigned_left_shift.cu ; -test_signed_left_shift.cu ; -test_unsigned_right_shift.cu ; -test_signed_right_shift.cu ; +run test_unsigned_or.cu ; +run test_signed_or.cu ; +run test_unsigned_and.cu ; +run test_signed_and.cu ; +run test_unsigned_xor.cu ; +run test_signed_xor.cu ; +run test_unsigned_not.cu ; +run test_signed_not.cu ; +run test_unsigned_left_shift.cu ; +run test_signed_left_shift.cu ; +run test_unsigned_right_shift.cu ; +run test_signed_right_shift.cu ; From 34e859869673d99138b665f960a62c37754732c2 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 11:05:50 -0400 Subject: [PATCH 075/137] Add cuda testing of functions --- test/cuda_jamfile | 13 +++++ test/test_bit_ceil.cu | 92 ++++++++++++++++++++++++++++++++++++ test/test_bit_floor.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_bit_width.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_byteswap.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_countl_one.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_countl_zero.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_countr_one.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_countr_zero.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_has_single_bit.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_popcount.cu | 91 +++++++++++++++++++++++++++++++++++ test/test_rotl.cu | 94 +++++++++++++++++++++++++++++++++++++ test/test_rotr.cu | 94 +++++++++++++++++++++++++++++++++++++ 13 files changed, 1112 insertions(+) create mode 100644 test/test_bit_ceil.cu create mode 100644 test/test_bit_floor.cu create mode 100644 test/test_bit_width.cu create mode 100644 test/test_byteswap.cu create mode 100644 test/test_countl_one.cu create mode 100644 test/test_countl_zero.cu create mode 100644 test/test_countr_one.cu create mode 100644 test/test_countr_zero.cu create mode 100644 test/test_has_single_bit.cu create mode 100644 test/test_popcount.cu create mode 100644 test/test_rotl.cu create mode 100644 test/test_rotr.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 343faf83..460f0dcb 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -32,3 +32,16 @@ run test_unsigned_left_shift.cu ; run test_signed_left_shift.cu ; run test_unsigned_right_shift.cu ; run test_signed_right_shift.cu ; + +run test_has_single_bit.cu ; +run test_countl_zero.cu ; +run test_countl_one.cu ; +run test_bit_width.cu ; +run test_bit_ceil.cu ; +run test_bit_floor.cu ; +run test_countr_zero.cu ; +run test_countr_one.cu ; +run test_rotl.cu ; +run test_rotr.cu ; +run test_popcount.cu ; +run test_byteswap.cu ; diff --git a/test/test_bit_ceil.cu b/test/test_bit_ceil.cu new file mode 100644 index 00000000..a660eec8 --- /dev/null +++ b/test/test_bit_ceil.cu @@ -0,0 +1,92 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::bit_ceil(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + // Limit to values where bit_ceil won't overflow (bit_width <= 127) + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_MAX}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::bit_ceil(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bit_floor.cu b/test/test_bit_floor.cu new file mode 100644 index 00000000..171108d4 --- /dev/null +++ b/test/test_bit_floor.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::bit_floor(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::bit_floor(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bit_width.cu b/test/test_bit_width.cu new file mode 100644 index 00000000..d6c13c7d --- /dev/null +++ b/test/test_bit_width.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::bit_width(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::bit_width(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_byteswap.cu b/test/test_byteswap.cu new file mode 100644 index 00000000..9cb83f91 --- /dev/null +++ b/test/test_byteswap.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::byteswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::byteswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_countl_one.cu b/test/test_countl_one.cu new file mode 100644 index 00000000..ed76e92f --- /dev/null +++ b/test/test_countl_one.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::countl_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::countl_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_countl_zero.cu b/test/test_countl_zero.cu new file mode 100644 index 00000000..829584de --- /dev/null +++ b/test/test_countl_zero.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::countl_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::countl_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_countr_one.cu b/test/test_countr_one.cu new file mode 100644 index 00000000..9024cc29 --- /dev/null +++ b/test/test_countr_one.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::countr_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::countr_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_countr_zero.cu b/test/test_countr_zero.cu new file mode 100644 index 00000000..2cbf5b7d --- /dev/null +++ b/test/test_countr_zero.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::countr_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::countr_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_has_single_bit.cu b/test/test_has_single_bit.cu new file mode 100644 index 00000000..fcaa8ddd --- /dev/null +++ b/test/test_has_single_bit.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::has_single_bit(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::has_single_bit(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_popcount.cu b/test/test_popcount.cu new file mode 100644 index 00000000..5ee24511 --- /dev/null +++ b/test/test_popcount.cu @@ -0,0 +1,91 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::popcount(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::popcount(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_rotl.cu b/test/test_rotl.cu new file mode 100644 index 00000000..ab3bd01b --- /dev/null +++ b/test/test_rotl.cu @@ -0,0 +1,94 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const int *shift, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::rotl(in[i], shift[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr shift_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::uniform_int_distribution shift_dist {0, 127}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + shift_vector[i] = shift_dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), shift_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::rotl(input_vector[i], shift_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_rotr.cu b/test/test_rotr.cu new file mode 100644 index 00000000..09f2c68e --- /dev/null +++ b/test/test_rotr.cu @@ -0,0 +1,94 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const int *shift, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::rotr(in[i], shift[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr shift_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::uniform_int_distribution shift_dist {0, 127}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + shift_vector[i] = shift_dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), shift_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::rotr(input_vector[i], shift_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 6922814e01dadb0bbbefd9edaaa532020574e177 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 11:26:45 -0400 Subject: [PATCH 076/137] Make CLZ cuda compatible --- include/boost/int128/detail/clz.hpp | 51 ++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/include/boost/int128/detail/clz.hpp b/include/boost/int128/detail/clz.hpp index af5474eb..84bc092e 100644 --- a/include/boost/int128/detail/clz.hpp +++ b/include/boost/int128/detail/clz.hpp @@ -20,6 +20,8 @@ namespace detail { namespace impl { +#ifndef __NVCC__ + // See: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn BOOST_INT128_INLINE_CONSTEXPR int index64[64] = { 0, 47, 1, 56, 48, 27, 2, 60, @@ -32,8 +34,25 @@ BOOST_INT128_INLINE_CONSTEXPR int index64[64] = { 13, 18, 8, 12, 7, 6, 5, 63 }; +#endif + BOOST_INT128_HOST_DEVICE constexpr int bit_scan_reverse(std::uint64_t bb) noexcept { + #ifdef __NVCC__ + + constexpr int index64[64] = { + 0, 47, 1, 56, 48, 27, 2, 60, + 57, 49, 41, 37, 28, 16, 3, 61, + 54, 58, 35, 52, 50, 42, 21, 44, + 38, 32, 29, 23, 17, 11, 4, 62, + 46, 55, 26, 59, 40, 36, 15, 53, + 34, 51, 20, 43, 31, 22, 10, 45, + 25, 39, 14, 33, 19, 30, 9, 24, + 13, 18, 8, 12, 7, 6, 5, 63 + }; + + #endif + constexpr auto debruijn64 {UINT64_C(0x03f79d71b4cb0a89)}; BOOST_INT128_ASSUME(bb != 0); // LCOV_EXCL_LINE @@ -48,6 +67,8 @@ BOOST_INT128_HOST_DEVICE constexpr int bit_scan_reverse(std::uint64_t bb) noexce return index64[(bb * debruijn64) >> 58]; } +#ifndef __NVCC__ + BOOST_INT128_INLINE_CONSTEXPR int countl_mod37[37] = { 32, 31, 6, 30, 9, 5, 0, 29, 16, 8, 2, 4, 21, 0, 19, 28, @@ -56,8 +77,22 @@ BOOST_INT128_INLINE_CONSTEXPR int countl_mod37[37] = { 27, 12, 24, 13, 14, 0 }; +#endif + BOOST_INT128_HOST_DEVICE constexpr int backup_countl_impl(std::uint32_t x) noexcept { + #ifdef __NVCC__ + + constexpr int countl_mod37[37] = { + 32, 31, 6, 30, 9, 5, 0, 29, + 16, 8, 2, 4, 21, 0, 19, 28, + 25, 15, 0, 7, 10, 1, 17, 3, + 22, 20, 26, 0, 11, 18, 23, + 27, 12, 24, 13, 14, 0 + }; + + #endif + x |= x >> 1; x |= x >> 2; x |= x >> 4; @@ -67,26 +102,26 @@ BOOST_INT128_HOST_DEVICE constexpr int backup_countl_impl(std::uint32_t x) noexc return countl_mod37[x % 37]; } -#if BOOST_INT128_HAS_BUILTIN(__builtin_clz) +#if BOOST_INT128_HAS_BUILTIN(__builtin_clz) && !defined(__NVCC__) -BOOST_INT128_HOST_DEVICE constexpr int countl_impl(unsigned int x) noexcept +constexpr int countl_impl(unsigned int x) noexcept { return x ? __builtin_clz(x) : std::numeric_limits::digits; } -BOOST_INT128_HOST_DEVICE constexpr int countl_impl(unsigned long x) noexcept +constexpr int countl_impl(unsigned long x) noexcept { return x ? __builtin_clzl(x) : std::numeric_limits::digits; } -BOOST_INT128_HOST_DEVICE constexpr int countl_impl(unsigned long long x) noexcept +constexpr int countl_impl(unsigned long long x) noexcept { return x ? __builtin_clzll(x) : std::numeric_limits::digits; } -#elif (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) +#elif (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !defined(__NVCC__) -BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint32_t x) noexcept +constexpr int countl_impl(std::uint32_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -107,7 +142,7 @@ BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint32_t x) noexcept } } -BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint64_t x) noexcept +constexpr int countl_impl(std::uint64_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -130,7 +165,7 @@ BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint64_t x) noexcept #elif defined(_M_IX86) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) -BOOST_INT128_HOST_DEVICE constexpr int countl_impl(std::uint32_t x) noexcept +constexpr int countl_impl(std::uint32_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { From 674a3d772edf59b7d6e9cb560fbdf58edd34e748 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 11:26:53 -0400 Subject: [PATCH 077/137] Make CTZ CUDA compatible --- include/boost/int128/detail/ctz.hpp | 34 +++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/include/boost/int128/detail/ctz.hpp b/include/boost/int128/detail/ctz.hpp index 297d884b..60db4db8 100644 --- a/include/boost/int128/detail/ctz.hpp +++ b/include/boost/int128/detail/ctz.hpp @@ -20,25 +20,27 @@ namespace detail { namespace impl { -#if BOOST_INT128_HAS_BUILTIN(__builtin_ctz) +#if BOOST_INT128_HAS_BUILTIN(__builtin_ctz) && !defined(__NVCC__) -BOOST_INT128_HOST_DEVICE constexpr int countr_impl(unsigned int x) noexcept +constexpr int countr_impl(unsigned int x) noexcept { return x ? __builtin_ctz(x) : std::numeric_limits::digits; } -BOOST_INT128_HOST_DEVICE constexpr int countr_impl(unsigned long x) noexcept +constexpr int countr_impl(unsigned long x) noexcept { return x ? __builtin_ctzl(x) : std::numeric_limits::digits; } -BOOST_INT128_HOST_DEVICE constexpr int countr_impl(unsigned long long x) noexcept +constexpr int countr_impl(unsigned long long x) noexcept { return x ? __builtin_ctzll(x) : std::numeric_limits::digits; } #endif +#ifndef __NVCC__ + BOOST_INT128_INLINE_CONSTEXPR int countr_mod37[37] = { 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, @@ -47,12 +49,14 @@ BOOST_INT128_INLINE_CONSTEXPR int countr_mod37[37] = { 5, 20, 8, 19, 18 }; +#endif + #if defined(_MSC_VER) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) #pragma warning(push) #pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned -BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept +constexpr int countr_impl(std::uint32_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -75,7 +79,7 @@ BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept #pragma warning(pop) -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) || defined(__NVCC__) #ifdef _MSC_VER #pragma warning(push) @@ -84,6 +88,18 @@ BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept { + #ifdef __NVCC__ + + constexpr int countr_mod37[37] = { + 32, 0, 1, 26, 2, 23, 27, 0, + 3, 16, 24, 30, 28, 11, 0, 13, + 4, 7, 17, 0, 25, 22, 31, 15, + 29, 10, 12, 6, 0, 21, 14, 9, + 5, 20, 8, 19, 18 + }; + + #endif + return countr_mod37[(-x & x) % 37]; } @@ -93,9 +109,9 @@ BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept #endif -#if (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) +#if (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) && !defined(__NVCC__) -BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint64_t x) noexcept +constexpr int countr_impl(std::uint64_t x) noexcept { if (BOOST_INT128_IS_CONSTANT_EVALUATED(x)) { @@ -116,7 +132,7 @@ BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint64_t x) noexcept } } -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) || defined(__NVCC__) BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint64_t x) noexcept { From 9d09403039409686bc7c05c6ccd928c8d4aae7b7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 11:28:00 -0400 Subject: [PATCH 078/137] Have NVCC use non-intrinsic popcount and byteswap paths --- include/boost/int128/bit.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/boost/int128/bit.hpp b/include/boost/int128/bit.hpp index 39e46535..18a890a8 100644 --- a/include/boost/int128/bit.hpp +++ b/include/boost/int128/bit.hpp @@ -65,7 +65,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t rotr(const uint return x >> (static_cast(s) & mask) | x << (static_cast(-s) & mask); } -#if BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) +#if BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) && !defined(__NVCC__) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { @@ -139,7 +139,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint12 } } -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) || defined(__NVCC__) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { @@ -148,7 +148,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint12 #endif -#if BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) +#if BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) && !defined(__NVCC__) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const uint128_t x) noexcept { @@ -187,7 +187,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const } } -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) || defined(__NVCC__) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const uint128_t x) noexcept { From 08c33bcc2d8668bec0f231f26b7dcddadb1202c7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 11:43:29 -0400 Subject: [PATCH 079/137] Fix return value consistency and warning #186-D --- include/boost/int128/detail/int128_imp.hpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 0daae392..d2bdfa62 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -1945,9 +1945,19 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t default_rs_impl(const int128_t lhs, template BOOST_INT128_HOST_DEVICE int128_t intrinsic_rs_impl(const int128_t lhs, const Integer rhs) noexcept { - if (BOOST_INT128_UNLIKELY(rhs >= 128 || rhs < 0)) + BOOST_INT128_IF_CONSTEXPR (std::numeric_limits::is_signed) { - return {0, 0}; + if (rhs >= 128 || rhs < 0) + { + return lhs.high < 0 ? int128_t{-1, UINT64_MAX} : int128_t{0, 0}; + } + } + else + { + if (rhs >= 128) + { + return lhs.high < 0 ? int128_t{-1, UINT64_MAX} : int128_t{0, 0}; + } } #ifdef BOOST_INT128_HAS_INT128 From 119bde66edbe433749d94327f535c81dc3ed6b5e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 13:21:35 -0400 Subject: [PATCH 080/137] Add CUDA testing of cstdlib functions --- test/cuda_jamfile | 3 + test/test_signed_cstdlib_div.cu | 99 +++++++++++++++++++++++++++++++ test/test_unsigned_cstdlib_div.cu | 94 +++++++++++++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 test/test_signed_cstdlib_div.cu create mode 100644 test/test_unsigned_cstdlib_div.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 460f0dcb..1d0fc8c4 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -45,3 +45,6 @@ run test_rotl.cu ; run test_rotr.cu ; run test_popcount.cu ; run test_byteswap.cu ; + +run test_unsigned_cstdlib_div.cu ; +run test_signed_cstdlib_div.cu ; diff --git a/test/test_signed_cstdlib_div.cu b/test/test_signed_cstdlib_div.cu new file mode 100644 index 00000000..a8445ef5 --- /dev/null +++ b/test/test_signed_cstdlib_div.cu @@ -0,0 +1,99 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; +using result_type = boost::int128::i128div_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, result_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::div(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // Avoid zero divisors; use full signed range for both inputs + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + // Ensure non-zero divisor + do + { + input_vector2[i] = dist(rng); + } while (input_vector2[i] == 0); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::div(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i].quot != results[i].quot || output_vector[i].rem != results[i].rem) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_cstdlib_div.cu b/test/test_unsigned_cstdlib_div.cu new file mode 100644 index 00000000..62ccae81 --- /dev/null +++ b/test/test_unsigned_cstdlib_div.cu @@ -0,0 +1,94 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; +using result_type = boost::int128::u128div_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, result_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::div(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{1U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::div(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i].quot != results[i].quot || output_vector[i].rem != results[i].rem) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 05401487e14ebea4dc07774706bd86a93038ba5e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 13:27:13 -0400 Subject: [PATCH 081/137] Fix outdated documentation --- doc/modules/ROOT/pages/int128_t.adoc | 2 +- doc/modules/ROOT/pages/uint128_t.adoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/modules/ROOT/pages/int128_t.adoc b/doc/modules/ROOT/pages/int128_t.adoc index 3989fe6a..70090981 100644 --- a/doc/modules/ROOT/pages/int128_t.adoc +++ b/doc/modules/ROOT/pages/int128_t.adoc @@ -148,7 +148,7 @@ struct int128_t // Conversion to float BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; - BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; + explicit constexpr operator long double() const noexcept; // There are no long doubles on device }; } // namespace int128 diff --git a/doc/modules/ROOT/pages/uint128_t.adoc b/doc/modules/ROOT/pages/uint128_t.adoc index 70c59fc3..efe7d742 100644 --- a/doc/modules/ROOT/pages/uint128_t.adoc +++ b/doc/modules/ROOT/pages/uint128_t.adoc @@ -187,7 +187,7 @@ struct uint128_t // Conversion to float BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept; BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; - BOOST_INT128_HOST_DEVICE explicit constexpr operator long double() const noexcept; + explicit constexpr operator long double() const noexcept; // There are no long doubles on device }; } // namespace int128 From 690a7b567b3befae5ec1a77bfd2137fd44b67a68 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 12 Mar 2026 11:00:33 -0400 Subject: [PATCH 082/137] Add CUDA testing of all comparison operators --- test/cuda_jamfile | 13 +++++ test/test_signed_eq.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_signed_ge.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_signed_gt.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_signed_le.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_signed_lt.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_signed_ne.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_unsigned_eq.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_unsigned_ge.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_unsigned_gt.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_unsigned_le.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_unsigned_lt.cu | 119 +++++++++++++++++++++++++++++++++++++++ test/test_unsigned_ne.cu | 119 +++++++++++++++++++++++++++++++++++++++ 13 files changed, 1441 insertions(+) create mode 100644 test/test_signed_eq.cu create mode 100644 test/test_signed_ge.cu create mode 100644 test/test_signed_gt.cu create mode 100644 test/test_signed_le.cu create mode 100644 test/test_signed_lt.cu create mode 100644 test/test_signed_ne.cu create mode 100644 test/test_unsigned_eq.cu create mode 100644 test/test_unsigned_ge.cu create mode 100644 test/test_unsigned_gt.cu create mode 100644 test/test_unsigned_le.cu create mode 100644 test/test_unsigned_lt.cu create mode 100644 test/test_unsigned_ne.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 1d0fc8c4..cd2fcc13 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -46,5 +46,18 @@ run test_rotr.cu ; run test_popcount.cu ; run test_byteswap.cu ; +run test_unsigned_eq.cu ; +run test_signed_eq.cu ; +run test_unsigned_ne.cu ; +run test_signed_ne.cu ; +run test_unsigned_lt.cu ; +run test_signed_lt.cu ; +run test_unsigned_le.cu ; +run test_signed_le.cu ; +run test_unsigned_gt.cu ; +run test_signed_gt.cu ; +run test_unsigned_ge.cu ; +run test_signed_ge.cu ; + run test_unsigned_cstdlib_div.cu ; run test_signed_cstdlib_div.cu ; diff --git a/test/test_signed_eq.cu b/test/test_signed_eq.cu new file mode 100644 index 00000000..ac09ae44 --- /dev/null +++ b/test/test_signed_eq.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] == in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] == input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_ge.cu b/test/test_signed_ge.cu new file mode 100644 index 00000000..af5628fb --- /dev/null +++ b/test/test_signed_ge.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] >= in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] >= input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_gt.cu b/test/test_signed_gt.cu new file mode 100644 index 00000000..f37577b8 --- /dev/null +++ b/test/test_signed_gt.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] > in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] > input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_le.cu b/test/test_signed_le.cu new file mode 100644 index 00000000..05ee992a --- /dev/null +++ b/test/test_signed_le.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] <= in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] <= input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_lt.cu b/test/test_signed_lt.cu new file mode 100644 index 00000000..c356ae00 --- /dev/null +++ b/test/test_signed_lt.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] < in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] < input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_ne.cu b/test/test_signed_ne.cu new file mode 100644 index 00000000..39f9d64a --- /dev/null +++ b/test/test_signed_ne.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] != in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] != input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_eq.cu b/test/test_unsigned_eq.cu new file mode 100644 index 00000000..c2c1d415 --- /dev/null +++ b/test/test_unsigned_eq.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] == in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{std::numeric_limits::max()}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] == input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_ge.cu b/test/test_unsigned_ge.cu new file mode 100644 index 00000000..4803e307 --- /dev/null +++ b/test/test_unsigned_ge.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] >= in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{std::numeric_limits::max()}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] >= input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_gt.cu b/test/test_unsigned_gt.cu new file mode 100644 index 00000000..0dd51292 --- /dev/null +++ b/test/test_unsigned_gt.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] > in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{std::numeric_limits::max()}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] > input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_le.cu b/test/test_unsigned_le.cu new file mode 100644 index 00000000..4ef2d2b6 --- /dev/null +++ b/test/test_unsigned_le.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] <= in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{std::numeric_limits::max()}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] <= input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_lt.cu b/test/test_unsigned_lt.cu new file mode 100644 index 00000000..6394e773 --- /dev/null +++ b/test/test_unsigned_lt.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] < in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{std::numeric_limits::max()}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] < input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_ne.cu b/test/test_unsigned_ne.cu new file mode 100644 index 00000000..2356c75c --- /dev/null +++ b/test/test_unsigned_ne.cu @@ -0,0 +1,119 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const test_type *in1, const test_type *in2, bool *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = (in1[i] != in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{std::numeric_limits::max()}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(rng); + // Make some elements equal to test both true and false cases + if (i % 3 == 0) + { + input_vector2[i] = input_vector1[i]; + } + else + { + input_vector2[i] = dist(rng); + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(input_vector1[i] != input_vector2[i]); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 3a36accf51c28a9cf0304a47371c437302e417b1 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 08:21:35 -0500 Subject: [PATCH 083/137] Add CUDA testing of conversion functions --- test/cuda_jamfile | 3 + test/test_signed_to_unsigned_conversion.cu | 108 +++++++++++++++++++++ test/test_unsigned_to_signed_conversion.cu | 108 +++++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100644 test/test_signed_to_unsigned_conversion.cu create mode 100644 test/test_unsigned_to_signed_conversion.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index cd2fcc13..bac4ab2b 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -61,3 +61,6 @@ run test_signed_ge.cu ; run test_unsigned_cstdlib_div.cu ; run test_signed_cstdlib_div.cu ; + +run test_signed_to_unsigned_conversion.cu ; +run test_unsigned_to_signed_conversion.cu ; diff --git a/test/test_signed_to_unsigned_conversion.cu b/test/test_signed_to_unsigned_conversion.cu new file mode 100644 index 00000000..35cd558f --- /dev/null +++ b/test/test_signed_to_unsigned_conversion.cu @@ -0,0 +1,108 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using signed_type = boost::int128::int128_t; +using unsigned_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + * Converts signed int128 values to unsigned int128 values + */ +__global__ void cuda_test(const signed_type *in, unsigned_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = unsigned_type(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors with non-negative values so bit patterns match + boost::random::uniform_int_distribution dist {signed_type{0}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(unsigned_type(input_vector[i])); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_to_signed_conversion.cu b/test/test_unsigned_to_signed_conversion.cu new file mode 100644 index 00000000..67195383 --- /dev/null +++ b/test/test_unsigned_to_signed_conversion.cu @@ -0,0 +1,108 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using signed_type = boost::int128::int128_t; +using unsigned_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + * Converts unsigned int128 values to signed int128 values + */ +__global__ void cuda_test(const unsigned_type *in, signed_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = signed_type(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors with values that fit in signed range + boost::random::uniform_int_distribution dist {unsigned_type{0U}, static_cast((std::numeric_limits::max)())}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(signed_type(input_vector[i])); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From d10021af2693c90275cfbb308a0e0848d109d5e5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 08:25:03 -0500 Subject: [PATCH 084/137] Add sign conversion define --- test/test_signed_to_unsigned_conversion.cu | 2 ++ test/test_unsigned_to_signed_conversion.cu | 2 ++ 2 files changed, 4 insertions(+) diff --git a/test/test_signed_to_unsigned_conversion.cu b/test/test_signed_to_unsigned_conversion.cu index 35cd558f..5073f0f6 100644 --- a/test/test_signed_to_unsigned_conversion.cu +++ b/test/test_signed_to_unsigned_conversion.cu @@ -3,6 +3,8 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + #include #include #include diff --git a/test/test_unsigned_to_signed_conversion.cu b/test/test_unsigned_to_signed_conversion.cu index 67195383..016e7a5d 100644 --- a/test/test_unsigned_to_signed_conversion.cu +++ b/test/test_unsigned_to_signed_conversion.cu @@ -3,6 +3,8 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + #include #include #include From 1d786e31ba0f0ddfe806a55aec1b86e94231b8f0 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 13:40:38 -0400 Subject: [PATCH 085/137] Add CUDA testing of functions --- test/cuda_jamfile | 15 ++++++ test/test_signed_add_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_div_sat.cu | 96 ++++++++++++++++++++++++++++++++++ test/test_signed_gcd.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_lcm.cu | 94 +++++++++++++++++++++++++++++++++ test/test_signed_midpoint.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_mul_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_sub_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_add_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_div_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_gcd.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_lcm.cu | 94 +++++++++++++++++++++++++++++++++ test/test_unsigned_midpoint.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_mul_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_sub_sat.cu | 93 ++++++++++++++++++++++++++++++++ 15 files changed, 1322 insertions(+) create mode 100644 test/test_signed_add_sat.cu create mode 100644 test/test_signed_div_sat.cu create mode 100644 test/test_signed_gcd.cu create mode 100644 test/test_signed_lcm.cu create mode 100644 test/test_signed_midpoint.cu create mode 100644 test/test_signed_mul_sat.cu create mode 100644 test/test_signed_sub_sat.cu create mode 100644 test/test_unsigned_add_sat.cu create mode 100644 test/test_unsigned_div_sat.cu create mode 100644 test/test_unsigned_gcd.cu create mode 100644 test/test_unsigned_lcm.cu create mode 100644 test/test_unsigned_midpoint.cu create mode 100644 test/test_unsigned_mul_sat.cu create mode 100644 test/test_unsigned_sub_sat.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index bac4ab2b..82e2c8ee 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -64,3 +64,18 @@ run test_signed_cstdlib_div.cu ; run test_signed_to_unsigned_conversion.cu ; run test_unsigned_to_signed_conversion.cu ; + +run test_unsigned_add_sat.cu ; +run test_signed_add_sat.cu ; +run test_unsigned_sub_sat.cu ; +run test_signed_sub_sat.cu ; +run test_unsigned_mul_sat.cu ; +run test_signed_mul_sat.cu ; +run test_unsigned_div_sat.cu ; +run test_signed_div_sat.cu ; +run test_unsigned_gcd.cu ; +run test_signed_gcd.cu ; +run test_unsigned_lcm.cu ; +run test_signed_lcm.cu ; +run test_unsigned_midpoint.cu ; +run test_signed_midpoint.cu ; diff --git a/test/test_signed_add_sat.cu b/test/test_signed_add_sat.cu new file mode 100644 index 00000000..a86457f9 --- /dev/null +++ b/test/test_signed_add_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::add_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::add_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_div_sat.cu b/test/test_signed_div_sat.cu new file mode 100644 index 00000000..804d4dc9 --- /dev/null +++ b/test/test_signed_div_sat.cu @@ -0,0 +1,96 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::div_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + do + { + input_vector2[i] = dist(rng); + } while (input_vector2[i] == 0); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::div_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_gcd.cu b/test/test_signed_gcd.cu new file mode 100644 index 00000000..7d5c8434 --- /dev/null +++ b/test/test_signed_gcd.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::gcd(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::gcd(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_lcm.cu b/test/test_signed_lcm.cu new file mode 100644 index 00000000..bafe559d --- /dev/null +++ b/test/test_signed_lcm.cu @@ -0,0 +1,94 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::lcm(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // Use smaller values to avoid overflow in lcm computation + boost::random::uniform_int_distribution dist {test_type{-1, UINT64_MAX}, test_type{0, UINT64_MAX}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::lcm(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_midpoint.cu b/test/test_signed_midpoint.cu new file mode 100644 index 00000000..803ba974 --- /dev/null +++ b/test/test_signed_midpoint.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::midpoint(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::midpoint(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_mul_sat.cu b/test/test_signed_mul_sat.cu new file mode 100644 index 00000000..569e583c --- /dev/null +++ b/test/test_signed_mul_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::mul_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::mul_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_sub_sat.cu b/test/test_signed_sub_sat.cu new file mode 100644 index 00000000..7dd40f30 --- /dev/null +++ b/test/test_signed_sub_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::sub_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::sub_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_add_sat.cu b/test/test_unsigned_add_sat.cu new file mode 100644 index 00000000..3cfc0317 --- /dev/null +++ b/test/test_unsigned_add_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::add_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::add_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_div_sat.cu b/test/test_unsigned_div_sat.cu new file mode 100644 index 00000000..9f76b869 --- /dev/null +++ b/test/test_unsigned_div_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::div_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{1U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::div_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_gcd.cu b/test/test_unsigned_gcd.cu new file mode 100644 index 00000000..f23abe48 --- /dev/null +++ b/test/test_unsigned_gcd.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::gcd(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::gcd(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_lcm.cu b/test/test_unsigned_lcm.cu new file mode 100644 index 00000000..d586d58b --- /dev/null +++ b/test/test_unsigned_lcm.cu @@ -0,0 +1,94 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::lcm(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // Use smaller values to avoid overflow in lcm computation + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{0U, UINT64_MAX}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::lcm(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_midpoint.cu b/test/test_unsigned_midpoint.cu new file mode 100644 index 00000000..e695b5ff --- /dev/null +++ b/test/test_unsigned_midpoint.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::midpoint(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::midpoint(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_mul_sat.cu b/test/test_unsigned_mul_sat.cu new file mode 100644 index 00000000..228ef806 --- /dev/null +++ b/test/test_unsigned_mul_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::mul_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::mul_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_sub_sat.cu b/test/test_unsigned_sub_sat.cu new file mode 100644 index 00000000..73bf36d7 --- /dev/null +++ b/test/test_unsigned_sub_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::sub_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::sub_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 0796f36fdeb8eb026850bd2cf7ac3ecb51f4a8a6 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 13:56:10 -0400 Subject: [PATCH 086/137] Improve diagnostics --- test/test_signed_add_sat.cu | 17 +++++++++++++++-- test/test_signed_midpoint.cu | 17 +++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/test/test_signed_add_sat.cu b/test/test_signed_add_sat.cu index a86457f9..45b45116 100644 --- a/test/test_signed_add_sat.cu +++ b/test/test_signed_add_sat.cu @@ -77,14 +77,27 @@ int main(void) } double t = w.elapsed(); + int fail_count = 0; for (int i = 0; i < numElements; ++i) { if (output_vector[i] != results[i]) { - std::cerr << "Result verification failed at element " << i << "!" << std::endl; - return EXIT_FAILURE; + if (fail_count < 5) + { + std::cerr << "Result verification failed at element " << i << std::endl; + std::cerr << " input1 high: " << input_vector[i].high << " low: " << input_vector[i].low << std::endl; + std::cerr << " input2 high: " << input_vector2[i].high << " low: " << input_vector2[i].low << std::endl; + std::cerr << " GPU high: " << output_vector[i].high << " low: " << output_vector[i].low << std::endl; + std::cerr << " CPU high: " << results[i].high << " low: " << results[i].low << std::endl; + } + ++fail_count; } } + if (fail_count > 0) + { + std::cerr << "Total failures: " << fail_count << " out of " << numElements << std::endl; + return EXIT_FAILURE; + } std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; std::cout << "Done\n"; diff --git a/test/test_signed_midpoint.cu b/test/test_signed_midpoint.cu index 803ba974..5ee28d71 100644 --- a/test/test_signed_midpoint.cu +++ b/test/test_signed_midpoint.cu @@ -77,14 +77,27 @@ int main(void) } double t = w.elapsed(); + int fail_count = 0; for (int i = 0; i < numElements; ++i) { if (output_vector[i] != results[i]) { - std::cerr << "Result verification failed at element " << i << "!" << std::endl; - return EXIT_FAILURE; + if (fail_count < 5) + { + std::cerr << "Result verification failed at element " << i << std::endl; + std::cerr << " input1 high: " << input_vector[i].high << " low: " << input_vector[i].low << std::endl; + std::cerr << " input2 high: " << input_vector2[i].high << " low: " << input_vector2[i].low << std::endl; + std::cerr << " GPU high: " << output_vector[i].high << " low: " << output_vector[i].low << std::endl; + std::cerr << " CPU high: " << results[i].high << " low: " << results[i].low << std::endl; + } + ++fail_count; } } + if (fail_count > 0) + { + std::cerr << "Total failures: " << fail_count << " out of " << numElements << std::endl; + return EXIT_FAILURE; + } std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; std::cout << "Done\n"; From a1e05752f605796a8541ea7807222ad983dc5698 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 12 Mar 2026 14:25:06 -0400 Subject: [PATCH 087/137] Implement conversion operators between types --- include/boost/int128/detail/conversions.hpp | 14 ++++++++++++++ include/boost/int128/detail/int128_imp.hpp | 1 + include/boost/int128/detail/uint128_imp.hpp | 1 + 3 files changed, 16 insertions(+) diff --git a/include/boost/int128/detail/conversions.hpp b/include/boost/int128/detail/conversions.hpp index a549db03..f471d570 100644 --- a/include/boost/int128/detail/conversions.hpp +++ b/include/boost/int128/detail/conversions.hpp @@ -38,6 +38,20 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t::uint128_t(const int128_t& v) noexc #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE +//===================================== +// Conversion Operators +//===================================== + +BOOST_INT128_HOST_DEVICE constexpr int128_t::operator uint128_t() const noexcept +{ + return uint128_t{static_cast(this->high), static_cast(this->low)}; +} + +BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator int128_t() const noexcept +{ + return int128_t{static_cast(this->high), static_cast(this->low)}; +} + //===================================== // Comparison Operators //===================================== diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index d2bdfa62..4e5fcc11 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -57,6 +57,7 @@ int128_t // Requires a conversion file to be implemented BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator uint128_t() const noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index 26aa9370..fdd0b309 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -58,6 +58,7 @@ uint128_t // Requires a conversion file to be implemented BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator int128_t() const noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE From 39e43140175ac8b9895d3ce749e34337ee5c634e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 08:36:25 -0500 Subject: [PATCH 088/137] Remove forward declarations --- include/boost/int128/numeric.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/boost/int128/numeric.hpp b/include/boost/int128/numeric.hpp index 2375154f..33111e46 100644 --- a/include/boost/int128/numeric.hpp +++ b/include/boost/int128/numeric.hpp @@ -76,9 +76,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t sub_sat(const u return z; } -BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t add_sat(int128_t x, int128_t y) noexcept; -BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(int128_t x, int128_t y) noexcept; - #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable : 4307) // Addition Overflow From f188a38b8df6dc256464d111d916edf83b1edb16 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 08:38:54 -0500 Subject: [PATCH 089/137] Avoid undefined rollover --- include/boost/int128/numeric.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/boost/int128/numeric.hpp b/include/boost/int128/numeric.hpp index 33111e46..0665887f 100644 --- a/include/boost/int128/numeric.hpp +++ b/include/boost/int128/numeric.hpp @@ -115,7 +115,9 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(const int128_t x, const int1 if (x <= 0 && y >= 0) { // Underflow case - const auto res {x - y}; + const auto big_x {static_cast(x)}; + const auto big_y {static_cast(y)}; + const auto res {static_cast(big_x - big_y)}; return res > x ? (std::numeric_limits::min)() : res; } else if (x > 0 && y < 0) From 2d1fd23d1ea9ce469a96fdde9759d54dbb2eabf9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 11:15:45 -0500 Subject: [PATCH 090/137] Improve algorithms --- include/boost/int128/numeric.hpp | 68 ++++++++++++++------------------ 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/include/boost/int128/numeric.hpp b/include/boost/int128/numeric.hpp index 0665887f..23832a5c 100644 --- a/include/boost/int128/numeric.hpp +++ b/include/boost/int128/numeric.hpp @@ -84,56 +84,40 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t sub_sat(const u BOOST_INT128_HOST_DEVICE constexpr int128_t add_sat(const int128_t x, const int128_t y) noexcept { - if (x >= 0 && y >= 0) - { - constexpr auto max_value {static_cast((std::numeric_limits::max)())}; - const auto big_x {static_cast(x)}; - const auto big_y {static_cast(y)}; - const auto big_res {big_x + big_y}; + const auto result {x + y}; - return big_res > max_value ? (std::numeric_limits::max)() : static_cast(big_res); - } - else if ((x < 0 && y > 0) || (x > 0 && y < 0)) + // Positive overflow: both non-negative but result wrapped to negative + if (x.high >= 0 && y.high >= 0 && result.high < 0) { - return x + y; + return (std::numeric_limits::max)(); } - else + + // Negative overflow: both negative but result wrapped to non-negative + if (x.high < 0 && y.high < 0 && result.high >= 0) { - // x < 0 and y < 0 - // Nearly the same technique as the positive values case - constexpr auto max_value {-static_cast((std::numeric_limits::min)())}; - const auto big_x {static_cast(abs(x))}; - const auto big_y {static_cast(abs(y))}; - const auto big_res {big_x + big_y}; - - return big_res > max_value ? (std::numeric_limits::min)() : -static_cast(big_res); + return (std::numeric_limits::min)(); } + + return result; } BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(const int128_t x, const int128_t y) noexcept { - if (x <= 0 && y >= 0) + const auto result {x - y}; + + // Positive overflow: positive minus negative but result wrapped to negative + if (x.high >= 0 && y.high < 0 && result.high < 0) { - // Underflow case - const auto big_x {static_cast(x)}; - const auto big_y {static_cast(y)}; - const auto res {static_cast(big_x - big_y)}; - return res > x ? (std::numeric_limits::min)() : res; + return (std::numeric_limits::max)(); } - else if (x > 0 && y < 0) - { - // Overflow Case - constexpr auto max_val {static_cast((std::numeric_limits::max)())}; - const auto big_x {static_cast(x)}; - const auto big_y {-static_cast(y)}; - const auto res {big_x + big_y}; - return (res > max_val || res < big_x) ? (std::numeric_limits::max)() : static_cast(res); - } - else + // Negative overflow: negative minus non-negative but result wrapped to non-negative + if (x.high < 0 && y.high >= 0 && result.high >= 0) { - return x - y; + return (std::numeric_limits::min)(); } + + return result; } #ifdef _MSC_VER @@ -403,11 +387,17 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t midpoint(const int128_t a, const int // For signed integers, we use a + (b - a) / 2 or a - (a - b) / 2 // The subtraction is done in unsigned arithmetic to handle overflow correctly // Integer division automatically rounds toward the first argument + // + // Use direct field access for both the uint128 construction and the + // comparison to avoid NVCC host compiler issues with operator<= and + // static_cast on int128_t for large-magnitude values + + const uint128_t ua {static_cast(a.high), a.low}; + const uint128_t ub {static_cast(b.high), b.low}; - const auto ua {static_cast(a)}; - const auto ub {static_cast(b)}; + const bool a_le_b {a.high == b.high ? a.low <= b.low : a.high < b.high}; - if (a <= b) + if (a_le_b) { // diff = b - a (computed in unsigned, handles wrap-around correctly) const auto diff {ub - ua}; From 3a8b5c36f8f0c24c0c2880292e4bb7b971b791cd Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 11:18:23 -0500 Subject: [PATCH 091/137] Expand testing ranges --- test/test_signed_eq.cu | 2 +- test/test_signed_ge.cu | 2 +- test/test_signed_gt.cu | 2 +- test/test_signed_le.cu | 2 +- test/test_signed_lt.cu | 2 +- test/test_signed_ne.cu | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_signed_eq.cu b/test/test_signed_eq.cu index ac09ae44..4f7156c1 100644 --- a/test/test_signed_eq.cu +++ b/test/test_signed_eq.cu @@ -58,7 +58,7 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() + 1, (std::numeric_limits::max)() - 1}; for (std::size_t i = 0; i < numElements; ++i) { input_vector1[i] = dist(rng); diff --git a/test/test_signed_ge.cu b/test/test_signed_ge.cu index af5628fb..efe510ea 100644 --- a/test/test_signed_ge.cu +++ b/test/test_signed_ge.cu @@ -58,7 +58,7 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() + 1, (std::numeric_limits::max)() - 1}; for (std::size_t i = 0; i < numElements; ++i) { input_vector1[i] = dist(rng); diff --git a/test/test_signed_gt.cu b/test/test_signed_gt.cu index f37577b8..820b0797 100644 --- a/test/test_signed_gt.cu +++ b/test/test_signed_gt.cu @@ -58,7 +58,7 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() + 1, (std::numeric_limits::max)() - 1}; for (std::size_t i = 0; i < numElements; ++i) { input_vector1[i] = dist(rng); diff --git a/test/test_signed_le.cu b/test/test_signed_le.cu index 05ee992a..d2d67ce6 100644 --- a/test/test_signed_le.cu +++ b/test/test_signed_le.cu @@ -58,7 +58,7 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() + 1, (std::numeric_limits::max)() - 1}; for (std::size_t i = 0; i < numElements; ++i) { input_vector1[i] = dist(rng); diff --git a/test/test_signed_lt.cu b/test/test_signed_lt.cu index c356ae00..c4094c4d 100644 --- a/test/test_signed_lt.cu +++ b/test/test_signed_lt.cu @@ -58,7 +58,7 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() + 1, (std::numeric_limits::max)() - 1}; for (std::size_t i = 0; i < numElements; ++i) { input_vector1[i] = dist(rng); diff --git a/test/test_signed_ne.cu b/test/test_signed_ne.cu index 39f9d64a..6c34a111 100644 --- a/test/test_signed_ne.cu +++ b/test/test_signed_ne.cu @@ -58,7 +58,7 @@ int main(void) cuda_managed_ptr output_vector(numElements); // Initialize the input vectors - boost::random::uniform_int_distribution dist {test_type{(std::numeric_limits::min)()} + 1, test_type{(std::numeric_limits::max)()} - 1}; + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)() + 1, (std::numeric_limits::max)() - 1}; for (std::size_t i = 0; i < numElements; ++i) { input_vector1[i] = dist(rng); From 41d230381d6fc2e9a04c4bda033b60633eeb160d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 11:23:33 -0500 Subject: [PATCH 092/137] Update deprecated actions --- .github/workflows/codecov.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index bd2f7c8d..c1486f38 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -80,13 +80,13 @@ jobs: fi git config --global pack.threads 0 - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: # For coverage builds fetch the whole history, else only 1 commit using a 'fake ternary' fetch-depth: ${{ matrix.coverage && '0' || '1' }} - name: Cache ccache - uses: actions/cache@v3 + uses: actions/cache@v4 if: env.B2_USE_CCACHE with: path: ~/.ccache @@ -94,7 +94,7 @@ jobs: restore-keys: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}- - name: Fetch Boost.CI - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: boostorg/boost-ci ref: master From 705630c6f9745ecf121b8a6aa4945d34152312b9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 11:28:53 -0500 Subject: [PATCH 093/137] Further avoidance of UB --- include/boost/int128/numeric.hpp | 48 ++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/include/boost/int128/numeric.hpp b/include/boost/int128/numeric.hpp index 23832a5c..0699cd8b 100644 --- a/include/boost/int128/numeric.hpp +++ b/include/boost/int128/numeric.hpp @@ -84,40 +84,52 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t sub_sat(const u BOOST_INT128_HOST_DEVICE constexpr int128_t add_sat(const int128_t x, const int128_t y) noexcept { - const auto result {x + y}; + // Detect overflow BEFORE the addition to avoid signed overflow UB. + // When both are non-negative: overflow iff x > max - y (subtraction safe: max - non_negative >= 0) + // When both are negative: overflow iff x < min - y (subtraction safe: min - negative > min) + // Mixed signs: overflow is impossible. - // Positive overflow: both non-negative but result wrapped to negative - if (x.high >= 0 && y.high >= 0 && result.high < 0) + if (x.high >= 0 && y.high >= 0) { - return (std::numeric_limits::max)(); + if (x > (std::numeric_limits::max)() - y) + { + return (std::numeric_limits::max)(); + } } - - // Negative overflow: both negative but result wrapped to non-negative - if (x.high < 0 && y.high < 0 && result.high >= 0) + else if (x.high < 0 && y.high < 0) { - return (std::numeric_limits::min)(); + if (x < (std::numeric_limits::min)() - y) + { + return (std::numeric_limits::min)(); + } } - return result; + return x + y; } BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(const int128_t x, const int128_t y) noexcept { - const auto result {x - y}; + // Detect overflow BEFORE the subtraction to avoid signed overflow UB. + // Positive overflow: x >= 0 and y < 0 and x > max + y (safe: max + negative < max) + // Negative overflow: x < 0 and y >= 0 and x < min + y (safe: min + non_negative > min) + // Same signs: overflow is impossible. - // Positive overflow: positive minus negative but result wrapped to negative - if (x.high >= 0 && y.high < 0 && result.high < 0) + if (x.high >= 0 && y.high < 0) { - return (std::numeric_limits::max)(); + if (x > (std::numeric_limits::max)() + y) + { + return (std::numeric_limits::max)(); + } } - - // Negative overflow: negative minus non-negative but result wrapped to non-negative - if (x.high < 0 && y.high >= 0 && result.high >= 0) + else if (x.high < 0 && y.high >= 0) { - return (std::numeric_limits::min)(); + if (x < (std::numeric_limits::min)() + y) + { + return (std::numeric_limits::min)(); + } } - return result; + return x - y; } #ifdef _MSC_VER From f467aeda19b478c7614771a26a22698cb257f049 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 17 Mar 2026 10:18:56 -0500 Subject: [PATCH 094/137] Make charconv functions CUDA compatible --- include/boost/int128/charconv.hpp | 71 ++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/include/boost/int128/charconv.hpp b/include/boost/int128/charconv.hpp index 9171cae7..b92dc9fe 100644 --- a/include/boost/int128/charconv.hpp +++ b/include/boost/int128/charconv.hpp @@ -38,6 +38,8 @@ struct make_signed { using type = int128::int128_t; }; template <> struct make_signed { using type = int128::int128_t; }; +#ifndef __NVCC__ + BOOST_INT128_INLINE_CONSTEXPR int128::uint128_t int128_pow10[39] = { int128::uint128_t{UINT64_C(0x0), UINT64_C(0x1)}, @@ -81,8 +83,57 @@ BOOST_INT128_INLINE_CONSTEXPR int128::uint128_t int128_pow10[39] = int128::uint128_t{UINT64_C(0x4b3b4ca85a86c47a), UINT64_C(0x98a224000000000)} }; -constexpr int num_digits(const int128::uint128_t& x) noexcept +#endif // __NVCC__ + +BOOST_INT128_HOST_DEVICE constexpr int num_digits(const int128::uint128_t& x) noexcept { + #ifdef __NVCC__ + + constexpr int128::uint128_t int128_pow10[39] = + { + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x1)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xa)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x64)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x3e8)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x2710)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x186a0)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xf4240)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x989680)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x5f5e100)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x3b9aca00)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x2540be400)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x174876e800)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xe8d4a51000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x9184e72a000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x5af3107a4000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x38d7ea4c68000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x2386f26fc10000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x16345785d8a0000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xde0b6b3a7640000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x8ac7230489e80000)}, + int128::uint128_t{UINT64_C(0x5), UINT64_C(0x6bc75e2d63100000)}, + int128::uint128_t{UINT64_C(0x36), UINT64_C(0x35c9adc5dea00000)}, + int128::uint128_t{UINT64_C(0x21e), UINT64_C(0x19e0c9bab2400000)}, + int128::uint128_t{UINT64_C(0x152d), UINT64_C(0x2c7e14af6800000)}, + int128::uint128_t{UINT64_C(0xd3c2), UINT64_C(0x1bcecceda1000000)}, + int128::uint128_t{UINT64_C(0x84595), UINT64_C(0x161401484a000000)}, + int128::uint128_t{UINT64_C(0x52b7d2), UINT64_C(0xdcc80cd2e4000000)}, + int128::uint128_t{UINT64_C(0x33b2e3c), UINT64_C(0x9fd0803ce8000000)}, + int128::uint128_t{UINT64_C(0x204fce5e), UINT64_C(0x3e25026110000000)}, + int128::uint128_t{UINT64_C(0x1431e0fae), UINT64_C(0x6d7217caa0000000)}, + int128::uint128_t{UINT64_C(0xc9f2c9cd0), UINT64_C(0x4674edea40000000)}, + int128::uint128_t{UINT64_C(0x7e37be2022), UINT64_C(0xc0914b2680000000)}, + int128::uint128_t{UINT64_C(0x4ee2d6d415b), UINT64_C(0x85acef8100000000)}, + int128::uint128_t{UINT64_C(0x314dc6448d93), UINT64_C(0x38c15b0a00000000)}, + int128::uint128_t{UINT64_C(0x1ed09bead87c0), UINT64_C(0x378d8e6400000000)}, + int128::uint128_t{UINT64_C(0x13426172c74d82), UINT64_C(0x2b878fe800000000)}, + int128::uint128_t{UINT64_C(0xc097ce7bc90715), UINT64_C(0xb34b9f1000000000)}, + int128::uint128_t{UINT64_C(0x785ee10d5da46d9), UINT64_C(0xf436a000000000)}, + int128::uint128_t{UINT64_C(0x4b3b4ca85a86c47a), UINT64_C(0x98a224000000000)} + }; + + #endif // __NVCC__ + if (x.high == UINT64_C(0)) { return num_digits(x.low); @@ -91,7 +142,7 @@ constexpr int num_digits(const int128::uint128_t& x) noexcept // Use the most significant bit position to approximate log10 // log10(x) ~= log2(x) / log2(10) ~= log2(x) / 3.32 - const auto msb {64 + (63 - int128::detail::impl::countl_impl(x.high))}; + const auto msb {64 + (63 - int128::detail::countl_zero(x.high))}; // Approximate log10 const auto estimated_digits {(msb * 1000) / 3322 + 1}; @@ -112,27 +163,35 @@ constexpr int num_digits(const int128::uint128_t& x) noexcept } // namespace detail -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::uint128_t value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::uint128_t value, const int base = 10) noexcept { + #ifndef __NVCC__ + if (base == 10) { return detail::to_chars_128integer_impl(first, last, value); } + #endif // __NVCC__ + return detail::to_chars_integer_impl(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::int128_t value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::int128_t value, const int base = 10) noexcept { + #ifndef __NVCC__ + if (base == 10) { return detail::to_chars_128integer_impl(first, last, value); } + #endif // __NVCC__ + return detail::to_chars_integer_impl(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::uint128_t& value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::uint128_t& value, const int base = 10) noexcept { return detail::from_chars_integer_impl(first, last, value, base); } @@ -142,7 +201,7 @@ BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(core::string_view sv, return detail::from_chars_integer_impl(sv.data(), sv.data() + sv.size(), value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::int128_t& value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::int128_t& value, const int base = 10) noexcept { return detail::from_chars_integer_impl(first, last, value, base); } From 6112b0db77acf09e8657fe50d4a4a66b5ab5f161 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 17 Mar 2026 10:19:14 -0500 Subject: [PATCH 095/137] Add testing of CUDA charconv functions --- test/cuda_jamfile | 5 ++ test/test_signed_from_chars.cu | 112 +++++++++++++++++++++++++++++++ test/test_signed_to_chars.cu | 110 ++++++++++++++++++++++++++++++ test/test_unsigned_from_chars.cu | 112 +++++++++++++++++++++++++++++++ test/test_unsigned_to_chars.cu | 110 ++++++++++++++++++++++++++++++ 5 files changed, 449 insertions(+) create mode 100644 test/test_signed_from_chars.cu create mode 100644 test/test_signed_to_chars.cu create mode 100644 test/test_unsigned_from_chars.cu create mode 100644 test/test_unsigned_to_chars.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 82e2c8ee..5451a2c4 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -79,3 +79,8 @@ run test_unsigned_lcm.cu ; run test_signed_lcm.cu ; run test_unsigned_midpoint.cu ; run test_signed_midpoint.cu ; + +run test_unsigned_to_chars.cu ; +run test_signed_to_chars.cu ; +run test_unsigned_from_chars.cu ; +run test_signed_from_chars.cu ; diff --git a/test/test_signed_from_chars.cu b/test/test_signed_from_chars.cu new file mode 100644 index 00000000..66d67eb0 --- /dev/null +++ b/test/test_signed_from_chars.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors by generating random values and converting to strings + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type cpu_val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], cpu_val); + + if (output_vector[i] != cpu_val) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_to_chars.cu b/test/test_signed_to_chars.cu new file mode 100644 index 00000000..20a6a944 --- /dev/null +++ b/test/test_signed_to_chars.cu @@ -0,0 +1,110 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_from_chars.cu b/test/test_unsigned_from_chars.cu new file mode 100644 index 00000000..727dcfa9 --- /dev/null +++ b/test/test_unsigned_from_chars.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors by generating random values and converting to strings + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type cpu_val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], cpu_val); + + if (output_vector[i] != cpu_val) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_to_chars.cu b/test/test_unsigned_to_chars.cu new file mode 100644 index 00000000..25d4252a --- /dev/null +++ b/test/test_unsigned_to_chars.cu @@ -0,0 +1,110 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From f83ac1699cf22c495ac4385730ff5ceab531e8a9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 17 Mar 2026 10:54:29 -0500 Subject: [PATCH 096/137] Add missing dependency --- test/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f2fbf41e..809715e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,9 +19,8 @@ if(HAVE_BOOST_TEST) enable_testing() - boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) - - + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random Boost::charconv ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + else() boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::int128 Boost::core Boost::random Boost::multiprecision Boost::mp11 Boost::charconv) From 636258e896274b7d43797eae3de648833d58256c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 17 Mar 2026 11:06:18 -0500 Subject: [PATCH 097/137] Specialize get_max_value template --- include/boost/int128/charconv.hpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/boost/int128/charconv.hpp b/include/boost/int128/charconv.hpp index b92dc9fe..f8176f15 100644 --- a/include/boost/int128/charconv.hpp +++ b/include/boost/int128/charconv.hpp @@ -38,6 +38,22 @@ struct make_signed { using type = int128::int128_t; }; template <> struct make_signed { using type = int128::int128_t; }; +#ifdef __NVCC__ + +template <> +__host__ __device__ constexpr int128::uint128_t get_max_value() +{ + return std::numeric_limits::max(); +} + +template <> +__host__ __device__ constexpr int128::int128_t get_max_value() +{ + return std::numeric_limits::max(); +} + +#endif // __NVCC__ + #ifndef __NVCC__ BOOST_INT128_INLINE_CONSTEXPR int128::uint128_t int128_pow10[39] = From 9dd22a13f8d2af0151a2f24020136cb4d2ae4c0a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 17 Mar 2026 11:37:52 -0500 Subject: [PATCH 098/137] Add testing of all valid bases for charconv --- test/cuda_jamfile | 5 + test/test_signed_from_chars_bases.cu | 125 +++++++++++++++++++++++++ test/test_signed_to_chars_bases.cu | 117 +++++++++++++++++++++++ test/test_unsigned_from_chars_bases.cu | 125 +++++++++++++++++++++++++ test/test_unsigned_to_chars_bases.cu | 117 +++++++++++++++++++++++ 5 files changed, 489 insertions(+) create mode 100644 test/test_signed_from_chars_bases.cu create mode 100644 test/test_signed_to_chars_bases.cu create mode 100644 test/test_unsigned_from_chars_bases.cu create mode 100644 test/test_unsigned_to_chars_bases.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 5451a2c4..dab3ff9d 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -84,3 +84,8 @@ run test_unsigned_to_chars.cu ; run test_signed_to_chars.cu ; run test_unsigned_from_chars.cu ; run test_signed_from_chars.cu ; + +run test_unsigned_to_chars_bases.cu ; +run test_signed_to_chars_bases.cu ; +run test_unsigned_from_chars_bases.cu ; +run test_signed_from_chars_bases.cu ; diff --git a/test/test_signed_from_chars_bases.cu b/test/test_signed_from_chars_bases.cu new file mode 100644 index 00000000..69b175fb --- /dev/null +++ b/test/test_signed_from_chars_bases.cu @@ -0,0 +1,125 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_to_chars_bases.cu b/test/test_signed_to_chars_bases.cu new file mode 100644 index 00000000..15733649 --- /dev/null +++ b/test/test_signed_to_chars_bases.cu @@ -0,0 +1,117 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_from_chars_bases.cu b/test/test_unsigned_from_chars_bases.cu new file mode 100644 index 00000000..514e4cdc --- /dev/null +++ b/test/test_unsigned_from_chars_bases.cu @@ -0,0 +1,125 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_to_chars_bases.cu b/test/test_unsigned_to_chars_bases.cu new file mode 100644 index 00000000..2a4545a2 --- /dev/null +++ b/test/test_unsigned_to_chars_bases.cu @@ -0,0 +1,117 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} From f07cd360f0cc364862231eacddc79044d77fcd69 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 18 Mar 2026 06:24:21 -0500 Subject: [PATCH 099/137] Add testing of literals with CUDA devices --- test/cuda_jamfile | 3 + test/test_signed_literals.cu | 161 +++++++++++++++++++++++++++++++++ test/test_unsigned_literals.cu | 149 ++++++++++++++++++++++++++++++ 3 files changed, 313 insertions(+) create mode 100644 test/test_signed_literals.cu create mode 100644 test/test_unsigned_literals.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index dab3ff9d..1a67d48d 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -89,3 +89,6 @@ run test_unsigned_to_chars_bases.cu ; run test_signed_to_chars_bases.cu ; run test_unsigned_from_chars_bases.cu ; run test_signed_from_chars_bases.cu ; + +run test_unsigned_literals.cu ; +run test_signed_literals.cu ; diff --git a/test/test_signed_literals.cu b/test/test_signed_literals.cu new file mode 100644 index 00000000..5035aa52 --- /dev/null +++ b/test/test_signed_literals.cu @@ -0,0 +1,161 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using boost::int128::int128_t; +using namespace boost::int128::literals; + +// Number of test cases: we test each literal operator with several values +// Operators: +// 1. operator""_i128(const char*) - raw literal, decimal +// 2. operator""_I128(const char*) - raw literal, decimal +// 3. operator""_i128(const char*, size_t) - cooked string literal +// 4. operator""_I128(const char*, size_t) - cooked string literal +// 5. operator""_i128(unsigned long long) - integer literal +// 6. operator""_I128(unsigned long long) - integer literal + +constexpr int NUM_TESTS = 30; + +__global__ void cuda_test(int128_t *out) +{ + int i = threadIdx.x; + + // operator""_i128(const char*) - raw literal + if (i == 0) { out[i] = 0_i128; } + if (i == 1) { out[i] = 1_i128; } + if (i == 2) { out[i] = 170141183460469231731687303715884105727_i128; } + if (i == 3) { out[i] = 999999999999999999_i128; } + if (i == 4) { out[i] = 42_i128; } + + // operator""_I128(const char*) - raw literal + if (i == 5) { out[i] = 0_I128; } + if (i == 6) { out[i] = 1_I128; } + if (i == 7) { out[i] = 170141183460469231731687303715884105727_I128; } + if (i == 8) { out[i] = 999999999999999999_I128; } + if (i == 9) { out[i] = 42_I128; } + + // operator""_i128(const char*, size_t) - string literal (supports negative) + if (i == 10) { out[i] = "0"_i128; } + if (i == 11) { out[i] = "1"_i128; } + if (i == 12) { out[i] = "170141183460469231731687303715884105727"_i128; } + if (i == 13) { out[i] = "-1"_i128; } + if (i == 14) { out[i] = "-170141183460469231731687303715884105727"_i128; } + + // operator""_I128(const char*, size_t) - string literal (supports negative) + if (i == 15) { out[i] = "0"_I128; } + if (i == 16) { out[i] = "1"_I128; } + if (i == 17) { out[i] = "170141183460469231731687303715884105727"_I128; } + if (i == 18) { out[i] = "-1"_I128; } + if (i == 19) { out[i] = "-170141183460469231731687303715884105727"_I128; } + + // operator""_i128(unsigned long long) - integer literal + if (i == 20) { out[i] = 0_i128; } + if (i == 21) { out[i] = 1_i128; } + if (i == 22) { out[i] = 18446744073709551615_i128; } + if (i == 23) { out[i] = 42_i128; } + if (i == 24) { out[i] = 100_i128; } + + // operator""_I128(unsigned long long) - integer literal + if (i == 25) { out[i] = 0_I128; } + if (i == 26) { out[i] = 1_I128; } + if (i == 27) { out[i] = 18446744073709551615_I128; } + if (i == 28) { out[i] = 42_I128; } + if (i == 29) { out[i] = 100_I128; } +} + +int main(void) +{ + cudaError_t err = cudaSuccess; + + std::cout << "[Signed literal tests: " << NUM_TESTS << " cases]" << std::endl; + + cuda_managed_ptr output(NUM_TESTS); + + // Launch with 1 block of NUM_TESTS threads + watch w; + + cuda_test<<<1, NUM_TESTS>>>(output.get()); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Build expected values on host using the same literals + int128_t expected[NUM_TESTS]; + + // operator""_i128(const char*) - raw literal + expected[0] = 0_i128; + expected[1] = 1_i128; + expected[2] = 170141183460469231731687303715884105727_i128; + expected[3] = 999999999999999999_i128; + expected[4] = 42_i128; + + // operator""_I128(const char*) - raw literal + expected[5] = 0_I128; + expected[6] = 1_I128; + expected[7] = 170141183460469231731687303715884105727_I128; + expected[8] = 999999999999999999_I128; + expected[9] = 42_I128; + + // operator""_i128(const char*, size_t) - string literal (supports negative) + expected[10] = "0"_i128; + expected[11] = "1"_i128; + expected[12] = "170141183460469231731687303715884105727"_i128; + expected[13] = "-1"_i128; + expected[14] = "-170141183460469231731687303715884105727"_i128; + + // operator""_I128(const char*, size_t) - string literal (supports negative) + expected[15] = "0"_I128; + expected[16] = "1"_I128; + expected[17] = "170141183460469231731687303715884105727"_I128; + expected[18] = "-1"_I128; + expected[19] = "-170141183460469231731687303715884105727"_I128; + + // operator""_i128(unsigned long long) - integer literal + expected[20] = 0_i128; + expected[21] = 1_i128; + expected[22] = 18446744073709551615_i128; + expected[23] = 42_i128; + expected[24] = 100_i128; + + // operator""_I128(unsigned long long) - integer literal + expected[25] = 0_I128; + expected[26] = 1_I128; + expected[27] = 18446744073709551615_I128; + expected[28] = 42_I128; + expected[29] = 100_I128; + + // Verify + for (int i = 0; i < NUM_TESTS; ++i) + { + if (output[i] != expected[i]) + { + std::cerr << "Result verification failed at test case " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_literals.cu b/test/test_unsigned_literals.cu new file mode 100644 index 00000000..6fd5c1c6 --- /dev/null +++ b/test/test_unsigned_literals.cu @@ -0,0 +1,149 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using boost::int128::uint128_t; +using namespace boost::int128::literals; + +// Number of test cases: we test each literal operator with several values +// Operators: +// 1. operator""_u128(const char*) - raw literal, decimal +// 2. operator""_U128(const char*) - raw literal, decimal +// 3. operator""_u128(const char*, size_t) - cooked string literal +// 4. operator""_U128(const char*, size_t) - cooked string literal +// 5. operator""_u128(unsigned long long) - integer literal +// 6. operator""_U128(unsigned long long) - integer literal + +constexpr int NUM_TESTS = 24; + +__global__ void cuda_test(uint128_t *out) +{ + int i = threadIdx.x; + + // operator""_u128(const char*) - raw literal + if (i == 0) { out[i] = 0_u128; } + if (i == 1) { out[i] = 1_u128; } + if (i == 2) { out[i] = 340282366920938463463374607431768211455_u128; } + if (i == 3) { out[i] = 999999999999999999_u128; } + + // operator""_U128(const char*) - raw literal + if (i == 4) { out[i] = 0_U128; } + if (i == 5) { out[i] = 1_U128; } + if (i == 6) { out[i] = 340282366920938463463374607431768211455_U128; } + if (i == 7) { out[i] = 999999999999999999_U128; } + + // operator""_u128(const char*, size_t) - string literal + if (i == 8) { out[i] = "0"_u128; } + if (i == 9) { out[i] = "1"_u128; } + if (i == 10) { out[i] = "340282366920938463463374607431768211455"_u128; } + if (i == 11) { out[i] = "999999999999999999"_u128; } + + // operator""_U128(const char*, size_t) - string literal + if (i == 12) { out[i] = "0"_U128; } + if (i == 13) { out[i] = "1"_U128; } + if (i == 14) { out[i] = "340282366920938463463374607431768211455"_U128; } + if (i == 15) { out[i] = "999999999999999999"_U128; } + + // operator""_u128(unsigned long long) - integer literal + if (i == 16) { out[i] = 0_u128; } + if (i == 17) { out[i] = 1_u128; } + if (i == 18) { out[i] = 18446744073709551615_u128; } + if (i == 19) { out[i] = 42_u128; } + + // operator""_U128(unsigned long long) - integer literal + if (i == 20) { out[i] = 0_U128; } + if (i == 21) { out[i] = 1_U128; } + if (i == 22) { out[i] = 18446744073709551615_U128; } + if (i == 23) { out[i] = 42_U128; } +} + +int main(void) +{ + cudaError_t err = cudaSuccess; + + std::cout << "[Unsigned literal tests: " << NUM_TESTS << " cases]" << std::endl; + + cuda_managed_ptr output(NUM_TESTS); + + // Launch with 1 block of NUM_TESTS threads + watch w; + + cuda_test<<<1, NUM_TESTS>>>(output.get()); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Build expected values on host using the same literals + uint128_t expected[NUM_TESTS]; + + // operator""_u128(const char*) - raw literal + expected[0] = 0_u128; + expected[1] = 1_u128; + expected[2] = 340282366920938463463374607431768211455_u128; + expected[3] = 999999999999999999_u128; + + // operator""_U128(const char*) - raw literal + expected[4] = 0_U128; + expected[5] = 1_U128; + expected[6] = 340282366920938463463374607431768211455_U128; + expected[7] = 999999999999999999_U128; + + // operator""_u128(const char*, size_t) - string literal + expected[8] = "0"_u128; + expected[9] = "1"_u128; + expected[10] = "340282366920938463463374607431768211455"_u128; + expected[11] = "999999999999999999"_u128; + + // operator""_U128(const char*, size_t) - string literal + expected[12] = "0"_U128; + expected[13] = "1"_U128; + expected[14] = "340282366920938463463374607431768211455"_U128; + expected[15] = "999999999999999999"_U128; + + // operator""_u128(unsigned long long) - integer literal + expected[16] = 0_u128; + expected[17] = 1_u128; + expected[18] = 18446744073709551615_u128; + expected[19] = 42_u128; + + // operator""_U128(unsigned long long) - integer literal + expected[20] = 0_U128; + expected[21] = 1_U128; + expected[22] = 18446744073709551615_U128; + expected[23] = 42_U128; + + // Verify + for (int i = 0; i < NUM_TESTS; ++i) + { + if (output[i] != expected[i]) + { + std::cerr << "Result verification failed at test case " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 5f2e85cebfe58260642248c0e752127eeaad4cf5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 18 Mar 2026 06:35:43 -0500 Subject: [PATCH 100/137] Move uchar values array when on device --- .../boost/int128/detail/mini_from_chars.hpp | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/boost/int128/detail/mini_from_chars.hpp b/include/boost/int128/detail/mini_from_chars.hpp index dfdf9b4f..b5023105 100644 --- a/include/boost/int128/detail/mini_from_chars.hpp +++ b/include/boost/int128/detail/mini_from_chars.hpp @@ -22,6 +22,9 @@ namespace int128 { namespace detail { namespace impl { + +#ifndef __NVCC__ + BOOST_INT128_INLINE_CONSTEXPR unsigned char uchar_values[] = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, @@ -42,9 +45,35 @@ BOOST_INT128_INLINE_CONSTEXPR unsigned char uchar_values[] = static_assert(sizeof(uchar_values) == 256, "uchar_values should represent all 256 values of unsigned char"); +#endif // __NVCC__ + // Convert characters for 0-9, A-Z, a-z to 0-35. Anything else is 255 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr auto digit_from_char(char val) noexcept -> unsigned char { + #ifdef __NVCC__ + + constexpr unsigned char uchar_values[] = + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; + + static_assert(sizeof(uchar_values) == 256, "uchar_values should represent all 256 values of unsigned char"); + + #endif // __NVCC__ + return uchar_values[static_cast(val)]; } From 1c1a6cc0750bcd7ad62b9e37151937cd696eea5a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 18 Mar 2026 06:46:09 -0500 Subject: [PATCH 101/137] Fix NVCC warnings --- test/test_signed_literals.cu | 16 ++++++++-------- test/test_unsigned_literals.cu | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/test_signed_literals.cu b/test/test_signed_literals.cu index 5035aa52..8723b06c 100644 --- a/test/test_signed_literals.cu +++ b/test/test_signed_literals.cu @@ -33,17 +33,17 @@ __global__ void cuda_test(int128_t *out) { int i = threadIdx.x; - // operator""_i128(const char*) - raw literal + // operator""_i128(const char*) - raw literal (values must fit unsigned long long to avoid NVCC warnings) if (i == 0) { out[i] = 0_i128; } if (i == 1) { out[i] = 1_i128; } - if (i == 2) { out[i] = 170141183460469231731687303715884105727_i128; } + if (i == 2) { out[i] = 18446744073709551615_i128; } if (i == 3) { out[i] = 999999999999999999_i128; } if (i == 4) { out[i] = 42_i128; } - // operator""_I128(const char*) - raw literal + // operator""_I128(const char*) - raw literal (values must fit unsigned long long to avoid NVCC warnings) if (i == 5) { out[i] = 0_I128; } if (i == 6) { out[i] = 1_I128; } - if (i == 7) { out[i] = 170141183460469231731687303715884105727_I128; } + if (i == 7) { out[i] = 18446744073709551615_I128; } if (i == 8) { out[i] = 999999999999999999_I128; } if (i == 9) { out[i] = 42_I128; } @@ -102,17 +102,17 @@ int main(void) // Build expected values on host using the same literals int128_t expected[NUM_TESTS]; - // operator""_i128(const char*) - raw literal + // operator""_i128(const char*) - raw literal (values must fit unsigned long long to avoid NVCC warnings) expected[0] = 0_i128; expected[1] = 1_i128; - expected[2] = 170141183460469231731687303715884105727_i128; + expected[2] = 18446744073709551615_i128; expected[3] = 999999999999999999_i128; expected[4] = 42_i128; - // operator""_I128(const char*) - raw literal + // operator""_I128(const char*) - raw literal (values must fit unsigned long long to avoid NVCC warnings) expected[5] = 0_I128; expected[6] = 1_I128; - expected[7] = 170141183460469231731687303715884105727_I128; + expected[7] = 18446744073709551615_I128; expected[8] = 999999999999999999_I128; expected[9] = 42_I128; diff --git a/test/test_unsigned_literals.cu b/test/test_unsigned_literals.cu index 6fd5c1c6..d3fad8c3 100644 --- a/test/test_unsigned_literals.cu +++ b/test/test_unsigned_literals.cu @@ -36,13 +36,13 @@ __global__ void cuda_test(uint128_t *out) // operator""_u128(const char*) - raw literal if (i == 0) { out[i] = 0_u128; } if (i == 1) { out[i] = 1_u128; } - if (i == 2) { out[i] = 340282366920938463463374607431768211455_u128; } + if (i == 2) { out[i] = 18446744073709551615_u128; } if (i == 3) { out[i] = 999999999999999999_u128; } // operator""_U128(const char*) - raw literal if (i == 4) { out[i] = 0_U128; } if (i == 5) { out[i] = 1_U128; } - if (i == 6) { out[i] = 340282366920938463463374607431768211455_U128; } + if (i == 6) { out[i] = 18446744073709551615_U128; } if (i == 7) { out[i] = 999999999999999999_U128; } // operator""_u128(const char*, size_t) - string literal @@ -99,13 +99,13 @@ int main(void) // operator""_u128(const char*) - raw literal expected[0] = 0_u128; expected[1] = 1_u128; - expected[2] = 340282366920938463463374607431768211455_u128; + expected[2] = 18446744073709551615_u128; expected[3] = 999999999999999999_u128; // operator""_U128(const char*) - raw literal expected[4] = 0_U128; expected[5] = 1_U128; - expected[6] = 340282366920938463463374607431768211455_U128; + expected[6] = 18446744073709551615_U128; expected[7] = 999999999999999999_U128; // operator""_u128(const char*, size_t) - string literal From 2ea99ac40dd80f8ebf7c21b366cfa419bad4f5b9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 22 Mar 2026 10:53:31 -0500 Subject: [PATCH 102/137] Always point to develop --- doc/int128-playbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/int128-playbook.yml b/doc/int128-playbook.yml index 6379b801..50418f66 100644 --- a/doc/int128-playbook.yml +++ b/doc/int128-playbook.yml @@ -5,7 +5,7 @@ content: sources: - url: .. start_path: doc - branches: HEAD + branches: develop output: dir: html ui: From a812152d1ebd1b3d6a633f8d4eb2056e83b81acb Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 22 Mar 2026 11:10:30 -0500 Subject: [PATCH 103/137] Macro CUDA detection and usage less fragile --- include/boost/int128/bit.hpp | 8 ++++---- include/boost/int128/charconv.hpp | 10 +++++----- include/boost/int128/detail/clz.hpp | 12 ++++++------ include/boost/int128/detail/config.hpp | 2 +- include/boost/int128/detail/ctz.hpp | 12 ++++++------ include/boost/int128/detail/int128_imp.hpp | 6 +++--- include/boost/int128/detail/mini_from_chars.hpp | 4 ++-- include/boost/int128/detail/mini_to_chars.hpp | 4 ++-- include/boost/int128/detail/uint128_imp.hpp | 4 ++-- 9 files changed, 31 insertions(+), 31 deletions(-) diff --git a/include/boost/int128/bit.hpp b/include/boost/int128/bit.hpp index 18a890a8..e014008a 100644 --- a/include/boost/int128/bit.hpp +++ b/include/boost/int128/bit.hpp @@ -65,7 +65,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t rotr(const uint return x >> (static_cast(s) & mask) | x << (static_cast(-s) & mask); } -#if BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) && !defined(__NVCC__) +#if BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { @@ -139,7 +139,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint12 } } -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) || defined(__NVCC__) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_popcountll) || (defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint128_t x) noexcept { @@ -148,7 +148,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int popcount(const uint12 #endif -#if BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) && !defined(__NVCC__) +#if BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const uint128_t x) noexcept { @@ -187,7 +187,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const } } -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) || defined(__NVCC__) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_bswap64) || (defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t byteswap(const uint128_t x) noexcept { diff --git a/include/boost/int128/charconv.hpp b/include/boost/int128/charconv.hpp index f8176f15..8652b656 100644 --- a/include/boost/int128/charconv.hpp +++ b/include/boost/int128/charconv.hpp @@ -38,7 +38,7 @@ struct make_signed { using type = int128::int128_t; }; template <> struct make_signed { using type = int128::int128_t; }; -#ifdef __NVCC__ +#if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) template <> __host__ __device__ constexpr int128::uint128_t get_max_value() @@ -54,7 +54,7 @@ __host__ __device__ constexpr int128::int128_t get_max_value() #endif // __NVCC__ -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_INLINE_CONSTEXPR int128::uint128_t int128_pow10[39] = { @@ -103,7 +103,7 @@ BOOST_INT128_INLINE_CONSTEXPR int128::uint128_t int128_pow10[39] = BOOST_INT128_HOST_DEVICE constexpr int num_digits(const int128::uint128_t& x) noexcept { - #ifdef __NVCC__ + #if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) constexpr int128::uint128_t int128_pow10[39] = { @@ -181,7 +181,7 @@ BOOST_INT128_HOST_DEVICE constexpr int num_digits(const int128::uint128_t& x) no BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::uint128_t value, const int base = 10) noexcept { - #ifndef __NVCC__ + #if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) if (base == 10) { @@ -195,7 +195,7 @@ BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::int128_t value, const int base = 10) noexcept { - #ifndef __NVCC__ + #if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) if (base == 10) { diff --git a/include/boost/int128/detail/clz.hpp b/include/boost/int128/detail/clz.hpp index 84bc092e..0ce6ad1f 100644 --- a/include/boost/int128/detail/clz.hpp +++ b/include/boost/int128/detail/clz.hpp @@ -20,7 +20,7 @@ namespace detail { namespace impl { -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) // See: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn BOOST_INT128_INLINE_CONSTEXPR int index64[64] = { @@ -38,7 +38,7 @@ BOOST_INT128_INLINE_CONSTEXPR int index64[64] = { BOOST_INT128_HOST_DEVICE constexpr int bit_scan_reverse(std::uint64_t bb) noexcept { - #ifdef __NVCC__ + #if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) constexpr int index64[64] = { 0, 47, 1, 56, 48, 27, 2, 60, @@ -67,7 +67,7 @@ BOOST_INT128_HOST_DEVICE constexpr int bit_scan_reverse(std::uint64_t bb) noexce return index64[(bb * debruijn64) >> 58]; } -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_INLINE_CONSTEXPR int countl_mod37[37] = { 32, 31, 6, 30, 9, 5, 0, 29, @@ -81,7 +81,7 @@ BOOST_INT128_INLINE_CONSTEXPR int countl_mod37[37] = { BOOST_INT128_HOST_DEVICE constexpr int backup_countl_impl(std::uint32_t x) noexcept { - #ifdef __NVCC__ + #if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) constexpr int countl_mod37[37] = { 32, 31, 6, 30, 9, 5, 0, 29, @@ -102,7 +102,7 @@ BOOST_INT128_HOST_DEVICE constexpr int backup_countl_impl(std::uint32_t x) noexc return countl_mod37[x % 37]; } -#if BOOST_INT128_HAS_BUILTIN(__builtin_clz) && !defined(__NVCC__) +#if BOOST_INT128_HAS_BUILTIN(__builtin_clz) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) constexpr int countl_impl(unsigned int x) noexcept { @@ -119,7 +119,7 @@ constexpr int countl_impl(unsigned long long x) noexcept return x ? __builtin_clzll(x) : std::numeric_limits::digits; } -#elif (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !defined(__NVCC__) +#elif (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) constexpr int countl_impl(std::uint32_t x) noexcept { diff --git a/include/boost/int128/detail/config.hpp b/include/boost/int128/detail/config.hpp index cf176850..72d3a0a6 100644 --- a/include/boost/int128/detail/config.hpp +++ b/include/boost/int128/detail/config.hpp @@ -285,7 +285,7 @@ using builtin_u128 = std::_Unsigned128; # endif #endif -#ifdef __NVCC__ +#if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) # define BOOST_INT128_HOST_DEVICE __host__ __device__ #else # define BOOST_INT128_HOST_DEVICE diff --git a/include/boost/int128/detail/ctz.hpp b/include/boost/int128/detail/ctz.hpp index 60db4db8..2c78ddcd 100644 --- a/include/boost/int128/detail/ctz.hpp +++ b/include/boost/int128/detail/ctz.hpp @@ -20,7 +20,7 @@ namespace detail { namespace impl { -#if BOOST_INT128_HAS_BUILTIN(__builtin_ctz) && !defined(__NVCC__) +#if BOOST_INT128_HAS_BUILTIN(__builtin_ctz) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) constexpr int countr_impl(unsigned int x) noexcept { @@ -39,7 +39,7 @@ constexpr int countr_impl(unsigned long long x) noexcept #endif -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_INLINE_CONSTEXPR int countr_mod37[37] = { 32, 0, 1, 26, 2, 23, 27, 0, @@ -79,7 +79,7 @@ constexpr int countr_impl(std::uint32_t x) noexcept #pragma warning(pop) -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) || defined(__NVCC__) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) || (defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) #ifdef _MSC_VER #pragma warning(push) @@ -88,7 +88,7 @@ constexpr int countr_impl(std::uint32_t x) noexcept BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept { - #ifdef __NVCC__ + #if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) constexpr int countr_mod37[37] = { 32, 0, 1, 26, 2, 23, 27, 0, @@ -109,7 +109,7 @@ BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint32_t x) noexcept #endif -#if (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) && !defined(__NVCC__) +#if (defined(_M_AMD64) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION) && !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) constexpr int countr_impl(std::uint64_t x) noexcept { @@ -132,7 +132,7 @@ constexpr int countr_impl(std::uint64_t x) noexcept } } -#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) || defined(__NVCC__) +#elif !BOOST_INT128_HAS_BUILTIN(__builtin_ctz) || (defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_HOST_DEVICE constexpr int countr_impl(std::uint64_t x) noexcept { diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 4e5fcc11..0a1be3f8 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -120,7 +120,7 @@ int128_t BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; // Long double does not exist on device - #ifndef __NVCC__ + #if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) explicit constexpr operator long double() const noexcept; #endif @@ -297,7 +297,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t::operator double() const noexcept return static_cast(high) * detail::offset_value_v + static_cast(low); } -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) constexpr int128_t::operator long double() const noexcept { @@ -2266,7 +2266,7 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t library_su BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_sub(const int128_t lhs, const int128_t rhs) noexcept { - #if defined(BOOST_INT128_HAS_BUILTIN_SUB_OVERFLOW) && (!defined(__aarch64__) || defined(__APPLE__) || !defined(BOOST_INT128_HAS_INT128)) && !defined(__NVCC__) + #if defined(BOOST_INT128_HAS_BUILTIN_SUB_OVERFLOW) && (!defined(__aarch64__) || defined(__APPLE__) || !defined(BOOST_INT128_HAS_INT128)) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) // __builtin_sub_overflow is marked constexpr so we don't need if consteval handling std::uint64_t result_low {}; diff --git a/include/boost/int128/detail/mini_from_chars.hpp b/include/boost/int128/detail/mini_from_chars.hpp index b5023105..a3198144 100644 --- a/include/boost/int128/detail/mini_from_chars.hpp +++ b/include/boost/int128/detail/mini_from_chars.hpp @@ -23,7 +23,7 @@ namespace detail { namespace impl { -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_INLINE_CONSTEXPR unsigned char uchar_values[] = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, @@ -50,7 +50,7 @@ static_assert(sizeof(uchar_values) == 256, "uchar_values should represent all 25 // Convert characters for 0-9, A-Z, a-z to 0-35. Anything else is 255 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr auto digit_from_char(char val) noexcept -> unsigned char { - #ifdef __NVCC__ + #if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) constexpr unsigned char uchar_values[] = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, diff --git a/include/boost/int128/detail/mini_to_chars.hpp b/include/boost/int128/detail/mini_to_chars.hpp index 31a5f04d..80e1fcd0 100644 --- a/include/boost/int128/detail/mini_to_chars.hpp +++ b/include/boost/int128/detail/mini_to_chars.hpp @@ -12,7 +12,7 @@ namespace boost { namespace int128 { namespace detail { -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) BOOST_INT128_INLINE_CONSTEXPR char lower_case_digit_table[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', @@ -32,7 +32,7 @@ static_assert(sizeof(upper_case_digit_table) == sizeof(char) * 16, "10 numbers, BOOST_INT128_HOST_DEVICE constexpr char* mini_to_chars(char (&buffer)[64], uint128_t v, const int base, const bool uppercase) noexcept { - #ifdef __NVCC__ + #if defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA) constexpr char lower_case_digit_table[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index fdd0b309..3fd7c9b7 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -131,7 +131,7 @@ uint128_t BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept; // long doubles do not exist on device - #ifndef __NVCC__ + #if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) explicit constexpr operator long double() const noexcept; #endif @@ -299,7 +299,7 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator double() const noexcept return static_cast(high) * detail::offset_value_v + static_cast(low); } -#ifndef __NVCC__ +#if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) constexpr uint128_t::operator long double() const noexcept { From 11bab282207b12c60326436db0ce50105d133c87 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 22 Mar 2026 11:30:25 -0500 Subject: [PATCH 104/137] Add CUDA related macros to the configuration and api references --- doc/modules/ROOT/pages/api_reference.adoc | 3 +++ doc/modules/ROOT/pages/config.adoc | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc index 3ce99e76..3ef06c21 100644 --- a/doc/modules/ROOT/pages/api_reference.adoc +++ b/doc/modules/ROOT/pages/api_reference.adoc @@ -307,6 +307,9 @@ Listed by analogous STL header. | xref:config.adoc#disable_exceptions[`BOOST_INT128_DISABLE_EXCEPTIONS`] | Disables exception throwing + +| xref:config.adoc#enable_cuda[`BOOST_INT128_ENABLE_CUDA`] +| Enables CUDA support allowing the library types and functions to be run on both host and device |=== ==== Automatic Configuration diff --git a/doc/modules/ROOT/pages/config.adoc b/doc/modules/ROOT/pages/config.adoc index 7d8458e1..cbc57dc4 100644 --- a/doc/modules/ROOT/pages/config.adoc +++ b/doc/modules/ROOT/pages/config.adoc @@ -17,6 +17,10 @@ https://www.boost.org/LICENSE_1_0.txt These macros allow customization of library behavior. User-configurable macros should be defined before including any library headers. +[#enable_cuda] +- `BOOST_INT128_ENABLE_CUDA`: Defining this macro allows both types and selected functions to be run on both host and device when compiling with NVCC. +Allowed functions have `BOOST_IN128_HOST_DEVICE` as part of their function signature in their documentation. + [#no_int128] - `BOOST_INT128_NO_BUILTIN_INT128`: The user may define this when they do not want the internal implementations to rely on builtin `pass:[__int128]` or `pass:[unsigned __int128]` types. From eba51c1e7ba8c2340e4cf51d80bd9e09169fe717 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 22 Mar 2026 13:01:30 -0500 Subject: [PATCH 105/137] Fix CML missing compile definitions for CUDA --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 809715e6..34c7f00c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,7 +19,7 @@ if(HAVE_BOOST_TEST) enable_testing() - boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random Boost::charconv ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random Boost::charconv ${CUDA_LIBRARIES} COMPILE_DEFINITIONS BOOST_INT128_ENABLE_CUDA=1 INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) else() From bd1ccfe125eba77c36b0e78c1f65dd9a937718da Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 22 Mar 2026 13:30:17 -0500 Subject: [PATCH 106/137] Add automatic macro definition for CUDA in Charconv --- include/boost/int128/charconv.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/boost/int128/charconv.hpp b/include/boost/int128/charconv.hpp index 8652b656..01b4fcb9 100644 --- a/include/boost/int128/charconv.hpp +++ b/include/boost/int128/charconv.hpp @@ -10,6 +10,12 @@ #if __has_include() +// Define for the user automatically, +// otherwise we'll have an ever-increasing number of these required as we go down the dependency chain +#if defined(BOOST_INT128_ENABLE_CUDA) && !defined(BOOST_CHARCONV_ENABLE_CUDA) +# define BOOST_CHARCONV_ENABLE_CUDA +#endif + #include #include #include From 37de754918cf71830b6851fe97f099957416cff9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 23 Mar 2026 10:36:24 -0400 Subject: [PATCH 107/137] Test C++17,20,23 on CUDA for incompatibilities --- .github/workflows/ci.yml | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7bc04bc6..a2387830 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1191,16 +1191,15 @@ jobs: cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY git submodule update --init tools/boostdep python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY - - name: Configure - run: | - cd ../boost-root - mkdir __build__ && cd __build__ - cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 -DCMAKE_CUDA_STANDARD=17 .. - - name: Build tests + - name: Test C++17/20/23 run: | - cd ../boost-root/__build__ - cmake --build . --target tests -j $(nproc) - - name: Run tests - run: | - cd ../boost-root/__build__ - ctest --output-on-failure --no-tests=error + for std in 17 20 23; do + echo "======== Testing C++${std} ========" + cd ../boost-root + rm -rf __build__ + mkdir __build__ && cd __build__ + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 -DCMAKE_CUDA_STANDARD=${std} .. + cmake --build . --target tests -j $(nproc) + ctest --output-on-failure --no-tests=error + cd $GITHUB_WORKSPACE + done From 9758b3541b66595b64caa15bf5fe0775b0352511 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 23 Mar 2026 10:38:09 -0400 Subject: [PATCH 108/137] Disable constant evaluation paths on device --- include/boost/int128/detail/config.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/int128/detail/config.hpp b/include/boost/int128/detail/config.hpp index 72d3a0a6..40f95ee8 100644 --- a/include/boost/int128/detail/config.hpp +++ b/include/boost/int128/detail/config.hpp @@ -102,9 +102,9 @@ using builtin_u128 = std::_Unsigned128; # define BOOST_INT128_HAS_BUILTIN_IS_CONSTANT_EVALUATED #endif -#if defined(BOOST_INT128_HAS_IS_CONSTANT_EVALUATED) +#if defined(BOOST_INT128_HAS_IS_CONSTANT_EVALUATED) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) # define BOOST_INT128_IS_CONSTANT_EVALUATED(x) std::is_constant_evaluated() -#elif defined(BOOST_INT128_HAS_BUILTIN_IS_CONSTANT_EVALUATED) +#elif defined(BOOST_INT128_HAS_BUILTIN_IS_CONSTANT_EVALUATED) && !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA)) # define BOOST_INT128_IS_CONSTANT_EVALUATED(x) __builtin_is_constant_evaluated() #else # define BOOST_INT128_IS_CONSTANT_EVALUATED(x) false From f645abeae963184aa831acb54e55c3b3ebca4b54 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 23 Mar 2026 10:56:20 -0400 Subject: [PATCH 109/137] Update versions of CI tools --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a2387830..549c0038 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1154,7 +1154,7 @@ jobs: runs-on: gpu-runner-1 steps: - - uses: Jimver/cuda-toolkit@v0.2.25 + - uses: Jimver/cuda-toolkit@v0.2.30 id: cuda-toolkit with: cuda: '12.8.0' @@ -1166,7 +1166,7 @@ jobs: echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+ echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" nvcc -V - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install Packages run: | From f3b89d5824c5120ad3873f195e55cfbf27900e6d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 23 Mar 2026 15:40:12 -0400 Subject: [PATCH 110/137] Replace deprecated find CUDA module --- test/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 34c7f00c..e601babb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,13 +13,13 @@ if(HAVE_BOOST_TEST) message(STATUS "Building Boost.int128 with CUDA") - find_package(CUDA REQUIRED) enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) set(CMAKE_CUDA_EXTENSIONS OFF) enable_testing() - boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random Boost::charconv ${CUDA_LIBRARIES} COMPILE_DEFINITIONS BOOST_INT128_ENABLE_CUDA=1 INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random Boost::charconv CUDA::cudart COMPILE_DEFINITIONS BOOST_INT128_ENABLE_CUDA=1 ) else() From 6b4f7a55e03d584a0a3da838c80596405121b326 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 24 Mar 2026 13:35:29 -0400 Subject: [PATCH 111/137] Remove unneeded CMake option --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 549c0038..3418f60e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1198,7 +1198,7 @@ jobs: cd ../boost-root rm -rf __build__ mkdir __build__ && cd __build__ - cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 -DCMAKE_CUDA_STANDARD=${std} .. + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_INT128_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCMAKE_CUDA_STANDARD=${std} .. cmake --build . --target tests -j $(nproc) ctest --output-on-failure --no-tests=error cd $GITHUB_WORKSPACE From da44c617597f2fb1357c081a8b247a44be14bf69 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 24 Mar 2026 14:25:38 -0400 Subject: [PATCH 112/137] Fix and add additional testing of ostream values --- test/test_stream.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test/test_stream.cpp b/test/test_stream.cpp index 836b4300..b6ebea8b 100644 --- a/test/test_stream.cpp +++ b/test/test_stream.cpp @@ -89,15 +89,25 @@ void test_ostream() std::stringstream hex_out; hex_out.flags(std::ios_base::hex); hex_out << hex_val; - BOOST_TEST_CSTR_EQ(hex_out.str().c_str(), "0xff"); + BOOST_TEST_CSTR_EQ(hex_out.str().c_str(), "ff"); // 32-bit windows does not set the flags correctly in CI #ifndef _M_IX86 + std::stringstream hex_out_base; + hex_out_base.flags(std::ios_base::hex | std::ios_base::showbase); + hex_out_base << hex_val; + BOOST_TEST_CSTR_EQ(hex_out_base.str().c_str(), "0xff"); + std::stringstream hex_out_upper; hex_out_upper.flags(std::ios_base::hex | std::ios_base::uppercase); hex_out_upper << hex_val; - BOOST_TEST_CSTR_EQ(hex_out_upper.str().c_str(), "0XFF"); + BOOST_TEST_CSTR_EQ(hex_out_upper.str().c_str(), "FF"); + + std::stringstream hex_out_upper_base; + hex_out_upper_base.flags(std::ios_base::hex | std::ios_base::uppercase | std::ios_base::showbase); + hex_out_upper_base << hex_val; + BOOST_TEST_CSTR_EQ(hex_out_upper_base.str().c_str(), "0XFF"); #endif @@ -105,7 +115,12 @@ void test_ostream() std::stringstream octal_out; octal_out.flags(std::ios_base::oct); octal_out << octal_val; - BOOST_TEST_CSTR_EQ(octal_out.str().c_str(), "04"); + BOOST_TEST_CSTR_EQ(octal_out.str().c_str(), "4"); + + std::stringstream octal_out_upper; + octal_out_upper.flags(std::ios_base::hex | std::ios_base::showbase); + octal_out_upper << octal_val; + BOOST_TEST_CSTR_EQ(octal_out.str().c_str(), "4"); BOOST_INT128_IF_CONSTEXPR (std::is_same::value) { From 8150b07ec34b6b5818eb2437548930ec7078231e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 24 Mar 2026 14:25:49 -0400 Subject: [PATCH 113/137] Only append base when the flags say so --- include/boost/int128/iostream.hpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/include/boost/int128/iostream.hpp b/include/boost/int128/iostream.hpp index 9d87d0f8..d6c4c7c0 100644 --- a/include/boost/int128/iostream.hpp +++ b/include/boost/int128/iostream.hpp @@ -124,14 +124,17 @@ auto operator<<(std::basic_ostream& os, const LibIntegerType& v) auto first {detail::mini_to_chars(buffer, v, base, uppercase)}; - if (base == 8) + if (flags & std::ios_base::showbase) { - *--first = '0'; - } - else if (base == 16) - { - *--first = uppercase ? 'X' : 'x'; - *--first = '0'; + if (base == 8) + { + *--first = '0'; + } + else if (base == 16) + { + *--first = uppercase ? 'X' : 'x'; + *--first = '0'; + } } BOOST_INT128_IF_CONSTEXPR (!std::is_same::value) From 5517b6ba547570f15bb9292ae6f51eae2618164e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 24 Mar 2026 14:25:59 -0400 Subject: [PATCH 114/137] Improve streaming docs --- doc/modules/ROOT/pages/stream.adoc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/modules/ROOT/pages/stream.adoc b/doc/modules/ROOT/pages/stream.adoc index bf6de608..04403303 100644 --- a/doc/modules/ROOT/pages/stream.adoc +++ b/doc/modules/ROOT/pages/stream.adoc @@ -44,7 +44,9 @@ The following flags from `` are supported for both streaming directions: - `std::oct` - Octal Numbers - `std::dec` - Decimal Numbers - `std::hex` - Hexadecimal Numbers -- `std::uppercase` - Upper Case Formatting (e.g. 0XFFFF) -- `std::nouppercase` - Lower Case Formatting (e.g. 0xffff) +- `std::uppercase` - Upper Case Formatting (e.g. FFFF) +- `std::nouppercase` - Lower Case Formatting (e.g. ffff) +- `std::showbase` - Adds a leading base for hex or oct numbers (e.g. 0xffff) +- `std::noshowbase` - Removes the leading base for hex or oct numbers (e.g. ffff) See the xref:examples.adoc#examples_io[IO streaming example] for usage demonstrations. From 33b80b2179f827c98e6b188348969cdfbad9edd8 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 25 Mar 2026 15:28:08 -0400 Subject: [PATCH 115/137] Add CUDA example --- examples/cuda.cu | 132 ++++++++++++++++++++++++++++++++++++++++++++++ test/cuda_jamfile | 2 + 2 files changed, 134 insertions(+) create mode 100644 examples/cuda.cu diff --git a/examples/cuda.cu b/examples/cuda.cu new file mode 100644 index 00000000..2420174e --- /dev/null +++ b/examples/cuda.cu @@ -0,0 +1,132 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using test_type = boost::int128::uint128_t; + +// Calculates the GCD of 2 values on device +__global__ void cuda_gcd(const test_type* in1, const test_type* in2, test_type* out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::gcd(in1[i], in2[i]); + } +} + +// Allocate managed space so that the arrays can be used on both host and device +void allocate(test_type* in, int numElements) +{ + cudaError_t err = cudaSuccess; + err = cudaMallocManaged(&in, numElements * sizeof(T)); + if (err != cudaSuccess) + { + throw std::runtime_error(cudaGetErrorString(err)); + } + + cudaDeviceSynchronize(); +} + +void cleanup(test_type* in1, test_type* in2, test_type* out) +{ + if (in1 != nullptr) + { + free(in1); + in1 = nullptr; + } + + if (in2 != nullptr) + { + free(in2); + in2 = nullptr; + } + + if (out != nullptr) + { + free(out); + out = nullptr; + } +} + +int main() +{ + std::mt19937_64 rng {42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate managed space for our inputs and GPU outputs + // We then fill them with random numbers + + test_type* in1; + test_type* in2; + test_type* out; + + allocate(in1, numElements); + allocate(in2, numElements); + allocate(out, numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + in1[i] = dist(rng); + in2[i] = dist(rng); + } + + const int threadsPerBlock = 256; + const int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + // Launch the CUDA kernel and check for errors + + cuda_gcd<<>>(in1, in2, out, numElements); + cudaDeviceSynchronize(); + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + cleanup(in1, in2, out); + return EXIT_FAILURE; + } + + // We now will perform the same operation using the same inputs on CPU, + // to compare the results for equality + + std::vector results; + results.reserve(numElements); + + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::gcd(in1[i], in2[i])); + } + + // We can now compare that our operation on GPU and the same operation on CPU have identical results + + for (int i = 0; i < numElements; ++i) + { + if (out[i] != results[i]) + { + std::cerr << "Result verification failed at element: " << i << "!" << std::endl; + cleanup(in1, in2, out); + return EXIT_FAILURE; + } + } + + cleanup(in1, in2, out); + + return 0; +} diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 1a67d48d..e7279d01 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -92,3 +92,5 @@ run test_signed_from_chars_bases.cu ; run test_unsigned_literals.cu ; run test_signed_literals.cu ; + +run ../examples/cuda.cu ; From 15ed8f44982c6c1ed7bba81f538bb094e6e91528 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 25 Mar 2026 15:32:12 -0400 Subject: [PATCH 116/137] Fix warnings and errors --- examples/cuda.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/cuda.cu b/examples/cuda.cu index 2420174e..67228d50 100644 --- a/examples/cuda.cu +++ b/examples/cuda.cu @@ -31,7 +31,7 @@ __global__ void cuda_gcd(const test_type* in1, const test_type* in2, test_type* void allocate(test_type* in, int numElements) { cudaError_t err = cudaSuccess; - err = cudaMallocManaged(&in, numElements * sizeof(T)); + err = cudaMallocManaged(&in, numElements * sizeof(test_type)); if (err != cudaSuccess) { throw std::runtime_error(cudaGetErrorString(err)); @@ -65,15 +65,15 @@ int main() { std::mt19937_64 rng {42}; - int numElements = 50000; + const int numElements = 50000; std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; // Allocate managed space for our inputs and GPU outputs // We then fill them with random numbers - test_type* in1; - test_type* in2; - test_type* out; + test_type* in1 = nullptr; + test_type* in2 = nullptr; + test_type* out = nullptr; allocate(in1, numElements); allocate(in2, numElements); @@ -95,7 +95,7 @@ int main() cuda_gcd<<>>(in1, in2, out, numElements); cudaDeviceSynchronize(); - err = cudaGetLastError(); + cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; From 637be29fb7492ebd538e82e5a5045ae8e449701e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 25 Mar 2026 15:36:14 -0400 Subject: [PATCH 117/137] Allow sign conversion with boost.random --- examples/cuda.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/cuda.cu b/examples/cuda.cu index 67228d50..62203249 100644 --- a/examples/cuda.cu +++ b/examples/cuda.cu @@ -3,6 +3,8 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + #include #include #include From 967deb25b2d34b70e213a7c7abdff9f20b4c71d8 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 25 Mar 2026 15:51:48 -0400 Subject: [PATCH 118/137] Cleanup semantics --- examples/cuda.cu | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/examples/cuda.cu b/examples/cuda.cu index 62203249..b6463098 100644 --- a/examples/cuda.cu +++ b/examples/cuda.cu @@ -30,10 +30,10 @@ __global__ void cuda_gcd(const test_type* in1, const test_type* in2, test_type* } // Allocate managed space so that the arrays can be used on both host and device -void allocate(test_type* in, int numElements) +void allocate(test_type** in, int numElements) { cudaError_t err = cudaSuccess; - err = cudaMallocManaged(&in, numElements * sizeof(test_type)); + err = cudaMallocManaged(in, numElements * sizeof(test_type)); if (err != cudaSuccess) { throw std::runtime_error(cudaGetErrorString(err)); @@ -42,25 +42,27 @@ void allocate(test_type* in, int numElements) cudaDeviceSynchronize(); } -void cleanup(test_type* in1, test_type* in2, test_type* out) +void cleanup(test_type** in1, test_type** in2, test_type** out) { - if (in1 != nullptr) + if (*in1 != nullptr) { - free(in1); - in1 = nullptr; + cudaFree(*in1); + *in1 = nullptr; } - if (in2 != nullptr) + if (*in2 != nullptr) { - free(in2); - in2 = nullptr; + cudaFree(*in2); + *in2 = nullptr; } - if (out != nullptr) + if (*out != nullptr) { - free(out); - out = nullptr; + cudaFree(*out); + *out = nullptr; } + + cudaDeviceReset(); } int main() @@ -77,9 +79,9 @@ int main() test_type* in2 = nullptr; test_type* out = nullptr; - allocate(in1, numElements); - allocate(in2, numElements); - allocate(out, numElements); + allocate(&in1, numElements); + allocate(&in2, numElements); + allocate(&out, numElements); boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; for (std::size_t i = 0; i < numElements; ++i) @@ -101,7 +103,7 @@ int main() if (err != cudaSuccess) { std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; - cleanup(in1, in2, out); + cleanup(&in1, &in2, &out); return EXIT_FAILURE; } @@ -113,7 +115,7 @@ int main() for (int i = 0; i < numElements; ++i) { - results.push_back(boost::int128::gcd(in1[i], in2[i])); + results.emplace_back(boost::int128::gcd(in1[i], in2[i])); } // We can now compare that our operation on GPU and the same operation on CPU have identical results @@ -123,12 +125,12 @@ int main() if (out[i] != results[i]) { std::cerr << "Result verification failed at element: " << i << "!" << std::endl; - cleanup(in1, in2, out); + cleanup(&in1, &in2, &out); return EXIT_FAILURE; } } - cleanup(in1, in2, out); + cleanup(&in1, &in2, &out); return 0; } From 4ef4fa4411905a78f0d82548095f9c30a3592c80 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 25 Mar 2026 15:55:18 -0400 Subject: [PATCH 119/137] Add completion statement --- examples/cuda.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/cuda.cu b/examples/cuda.cu index b6463098..ec20577c 100644 --- a/examples/cuda.cu +++ b/examples/cuda.cu @@ -132,5 +132,7 @@ int main() cleanup(&in1, &in2, &out); + std::cout << "All CPU and GPU computed elements match!" << std::endl; + return 0; } From fac932eed0d803fa9ec1715e2ec9e427ae585369 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 25 Mar 2026 15:55:25 -0400 Subject: [PATCH 120/137] Add to examples and nav --- doc/modules/ROOT/nav.adoc | 1 + doc/modules/ROOT/pages/examples.adoc | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc index c91432c1..ae2c1c61 100644 --- a/doc/modules/ROOT/nav.adoc +++ b/doc/modules/ROOT/nav.adoc @@ -12,6 +12,7 @@ ** xref:examples.adoc#examples_boost_math_random[Boost Math and Random Integration] ** xref:examples.adoc#examples_boost_charconv[Boost.Charconv Integration] ** xref:examples.adoc#examples_cstdlib[`` support (Combined div and mod)] +** xref:examples.adoc#examples_cuda[Use of the library in a CUDA kernel] * xref:api_reference.adoc[] ** xref:api_reference.adoc#api_namespaces[Namespaces] ** xref:api_reference.adoc#api_types[Types] diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc index 1e83f2d0..2db70e47 100644 --- a/doc/modules/ROOT/pages/examples.adoc +++ b/doc/modules/ROOT/pages/examples.adoc @@ -492,3 +492,22 @@ Verification: 142857142857142857 * 7 + 1 = 1000000000000000000 3 / 10 = 0 remainder 3 ---- ==== + +[#examples_cuda] +== CUDA Usage + +.This https://github.com/cppalliance/int128/blob/develop/examples/cuda.cu[example] demonstrates how to use library types and functions inside a CUDA kernel. +==== +[source, c++] +---- +include::example$cuda.cu[] +---- + +.Expected Output +[listing] +---- +[Vector operation on 50000 elements] +CUDA kernel launch with 196 blocks of 256 threads +All CPU and GPU computed elements match! +---- +==== From f9cb2ae193a3f08900e5bbc33d376412bffe6f63 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Mar 2026 09:47:25 +0000 Subject: [PATCH 121/137] Bump picomatch in /doc Bumps and [picomatch](https://github.com/micromatch/picomatch). These dependencies needed to be updated together. Updates `picomatch` from 2.3.1 to 2.3.2 - [Release notes](https://github.com/micromatch/picomatch/releases) - [Changelog](https://github.com/micromatch/picomatch/blob/master/CHANGELOG.md) - [Commits](https://github.com/micromatch/picomatch/compare/2.3.1...2.3.2) Updates `picomatch` from 4.0.3 to 4.0.4 - [Release notes](https://github.com/micromatch/picomatch/releases) - [Changelog](https://github.com/micromatch/picomatch/blob/master/CHANGELOG.md) - [Commits](https://github.com/micromatch/picomatch/compare/2.3.1...2.3.2) --- updated-dependencies: - dependency-name: picomatch dependency-version: 2.3.2 dependency-type: indirect - dependency-name: picomatch dependency-version: 4.0.4 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- doc/package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/package-lock.json b/doc/package-lock.json index 73c1f891..b91e6bad 100644 --- a/doc/package-lock.json +++ b/doc/package-lock.json @@ -1443,9 +1443,9 @@ } }, "node_modules/micromatch/node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", "dev": true, "license": "MIT", "engines": { @@ -1583,9 +1583,9 @@ "license": "MIT" }, "node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", "engines": { From 2fbfe6890d98b064eada2a1031364ed927aae756 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Mar 2026 08:44:34 -0400 Subject: [PATCH 122/137] Fix general documentation errors --- doc/modules/ROOT/nav.adoc | 2 +- doc/modules/ROOT/pages/api_reference.adoc | 9 ++++++--- doc/modules/ROOT/pages/config.adoc | 2 +- doc/modules/ROOT/pages/examples.adoc | 2 +- doc/modules/ROOT/pages/file_structure.adoc | 15 ++++++++++++--- doc/modules/ROOT/pages/format.adoc | 2 +- doc/modules/ROOT/pages/i128_benchmarks.adoc | 4 ++-- doc/modules/ROOT/pages/mixed_type_ops.adoc | 6 +++--- doc/modules/ROOT/pages/numeric.adoc | 7 +++++-- doc/modules/ROOT/pages/overview.adoc | 4 ++-- doc/modules/ROOT/pages/uint128_t.adoc | 10 +++++----- 11 files changed, 39 insertions(+), 24 deletions(-) diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc index ae2c1c61..f1b1df9b 100644 --- a/doc/modules/ROOT/nav.adoc +++ b/doc/modules/ROOT/nav.adoc @@ -6,7 +6,7 @@ ** xref:examples.adoc#examples_rollover[Rollover Behavior] ** xref:examples.adoc#examples_bit[`` support] ** xref:examples.adoc#examples_numeric[`` support (Saturating Arithmetic)] -** xref:examples.adoc#examples_numeric[`` support (Numeric Logarithms)] +** xref:examples.adoc#examples_numeric_algorithms[`` support (Numeric Algorithms)] ** xref:examples.adoc#examples_mixed_sign[Mixed Signedness Arithmetic] ** xref:examples.adoc#examples_to_string[String Conversion (to_string)] ** xref:examples.adoc#examples_boost_math_random[Boost Math and Random Integration] diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc index 3ef06c21..af870252 100644 --- a/doc/modules/ROOT/pages/api_reference.adoc +++ b/doc/modules/ROOT/pages/api_reference.adoc @@ -20,6 +20,9 @@ https://www.boost.org/LICENSE_1_0.txt | xref:literals.adoc[`boost::int128::literals`] | User-defined literals for 128-bit integers + +| xref:charconv.adoc[`boost::charconv`] +| `to_chars` and `from_chars` overloads for 128-bit integers (requires Boost.Charconv) |=== [#api_types] @@ -146,7 +149,7 @@ Listed by analogous STL header. |=== | Function | Description -| `abs` +| xref:int128_t.adoc#i128_math_operators[`abs`] | Absolute value |=== @@ -359,8 +362,8 @@ Listed by analogous STL header. | xref:format.adoc#std_format[``] | Formatting integration for pass:[C++20] `` -| xref:uint128_t.adoc[``] -| The `int128_t` and `uint128_t` types +| `` +| The xref:uint128_t.adoc[`uint128_t`] and xref:int128_t.adoc[`int128_t`] types | xref:stream.adoc[``] | Iostream overloads for `int128_t` and `uint128_t` diff --git a/doc/modules/ROOT/pages/config.adoc b/doc/modules/ROOT/pages/config.adoc index cbc57dc4..c9f0b9f8 100644 --- a/doc/modules/ROOT/pages/config.adoc +++ b/doc/modules/ROOT/pages/config.adoc @@ -19,7 +19,7 @@ These macros allow customization of library behavior. User-configurable macros s [#enable_cuda] - `BOOST_INT128_ENABLE_CUDA`: Defining this macro allows both types and selected functions to be run on both host and device when compiling with NVCC. -Allowed functions have `BOOST_IN128_HOST_DEVICE` as part of their function signature in their documentation. +Allowed functions have `BOOST_INT128_HOST_DEVICE` as part of their function signature in their documentation. [#no_int128] - `BOOST_INT128_NO_BUILTIN_INT128`: The user may define this when they do not want the internal implementations to rely on builtin `pass:[__int128]` or `pass:[unsigned __int128]` types. diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc index 2db70e47..7a8a4693 100644 --- a/doc/modules/ROOT/pages/examples.adoc +++ b/doc/modules/ROOT/pages/examples.adoc @@ -304,7 +304,7 @@ signed_value / 4U = 0 include::example$math_and_random.cpp[] ---- -.Expected Output +.Example Output (values vary per run) [listing] ---- === uint128_t === diff --git a/doc/modules/ROOT/pages/file_structure.adoc b/doc/modules/ROOT/pages/file_structure.adoc index c2d645a3..84c6765f 100644 --- a/doc/modules/ROOT/pages/file_structure.adoc +++ b/doc/modules/ROOT/pages/file_structure.adoc @@ -16,22 +16,25 @@ The entire library can be consumed via ``, or by independently | Header | Description | `` -| The complete library (includes all headers below) +| Convenience header (includes most headers below; does not include `charconv.hpp`, `fmt_format.hpp`, or `random.hpp`) | xref:bit.adoc[``] | Bit manipulation functions | xref:charconv.adoc[``] -| Character conversion (`to_chars`/`from_chars`) +| Character conversion (`to_chars`/`from_chars`); requires Boost.Charconv headers | xref:api_reference.adoc#api_macro_literals[``] | C-style limit macros (`BOOST_INT128_INT128_MAX`, etc.) +| xref:config.adoc[``] +| Configuration macros + | xref:cstdlib.adoc[``] | Division with quotient and remainder (`div`) | xref:format.adoc[``] -| `pass:[{fmt}]` library support +| `pass:[{fmt}]` library support; requires the `pass:[{fmt}]` library | xref:format.adoc[``] | C++20 `std::format` support @@ -50,4 +53,10 @@ The entire library can be consumed via ``, or by independently | xref:numeric.adoc[``] | Numeric functions (`gcd`, `lcm`, saturating arithmetic) + +| `` +| Traits for usage with Boost.Random + +| xref:string.adoc[``] +| `to_string` overloads |=== diff --git a/doc/modules/ROOT/pages/format.adoc b/doc/modules/ROOT/pages/format.adoc index 40304463..c257cf58 100644 --- a/doc/modules/ROOT/pages/format.adoc +++ b/doc/modules/ROOT/pages/format.adoc @@ -37,7 +37,7 @@ Examples: | `{:*^6d}` | `"**42**"` (centered with asterisks) |=== -NOTE: When no alignment is specified but a width is given (e.g., `{:6d}`), zero-padding is applied from the left. +NOTE: When no alignment is specified but a `0` prefix and width are given (e.g., `{:06d}`), zero-padding is applied from the left. Without the `0` prefix (e.g., `{:6d}`), space-padding is applied instead, matching `std::format` behavior. == Sign diff --git a/doc/modules/ROOT/pages/i128_benchmarks.adoc b/doc/modules/ROOT/pages/i128_benchmarks.adoc index 1914394d..915e1be5 100644 --- a/doc/modules/ROOT/pages/i128_benchmarks.adoc +++ b/doc/modules/ROOT/pages/i128_benchmarks.adoc @@ -12,7 +12,7 @@ https://www.boost.org/LICENSE_1_0.txt The benchmarks below represent the time in microseconds it takes to perform 20'000'000 operations between two values of random width (e.g. 2x1 words, 1x2 words, etc.). On most platforms we use the builtin `\__int128` as the reference benchmark. -When this is unavailable (such as on 32-bit architectures) we us `boost::multiprecision::int128_t` (abbreviated as `boost::mp::int128_t`) as it is widely used, and known to be portable. +When this is unavailable (such as on 32-bit architectures) we use `boost::multiprecision::int128_t` (abbreviated as `boost::mp::int128_t`) as it is widely used, and known to be portable. On MSVC platforms we use as reference `std::_Signed128` from the header `<__msvc_int128.hpp>` since this is bundled with their compiler. [#i128_linux] @@ -56,7 +56,7 @@ image::i128_graphs/linux/x64_relative_performance.png[x64 Relative Performance, image::i128_graphs/linux/ARM64_benchmarks.png[ARM64 Benchmark Results, width=100%] //// -image::i128_graphs/linux/ARM64_relative_performance.png[x64 Relative Performance, width=100%] +image::i128_graphs/linux/ARM64_relative_performance.png[ARM64 Relative Performance, width=100%] === S390x diff --git a/doc/modules/ROOT/pages/mixed_type_ops.adoc b/doc/modules/ROOT/pages/mixed_type_ops.adoc index 961490c8..38d2e356 100644 --- a/doc/modules/ROOT/pages/mixed_type_ops.adoc +++ b/doc/modules/ROOT/pages/mixed_type_ops.adoc @@ -110,7 +110,7 @@ BOOST_INT128_HOST_DEVICE constexpr bool operator!=(uint128_t lhs, int128_t rhs); BOOST_INT128_HOST_DEVICE constexpr bool operator!=(int128_t lhs, uint128_t rhs); ---- -If the `int128_t` argument is less than 0 returns `false`. +If the `int128_t` argument is less than 0 returns `true`. Otherwise, returns the same as `static_cast(lhs) != static_cast(rhs)`. === Less Than @@ -152,7 +152,7 @@ If `lhs` is type `int128_t` returns `false` if `lhs < 0` If `rhs` is type `int128_t` returns `true` if `rhs < 0` Otherwise, returns the same as `static_cast(lhs) > static_cast(rhs)`. -=== Less Than or Equal To +=== Greater Than or Equal To [source, c++] ---- @@ -167,7 +167,7 @@ Otherwise, returns the same as `static_cast(lhs) pass:[>=] static_cas == Arithmetic -If you define xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_CONVERSION`], the operators have the following behavior. +If you define xref:config.adoc#sign_conversion[`BOOST_INT128_ALLOW_SIGN_CONVERSION`], the operators have the following behavior. === Addition diff --git a/doc/modules/ROOT/pages/numeric.adoc b/doc/modules/ROOT/pages/numeric.adoc index af0fb9b3..432044c0 100644 --- a/doc/modules/ROOT/pages/numeric.adoc +++ b/doc/modules/ROOT/pages/numeric.adoc @@ -57,8 +57,11 @@ Should the `TargetIntegerType` not be able to represent the value of the `Librar namespace boost { namespace int128 { -constexpr -BOOST_INT128_HOST_DEVICE constexpr TargetIntegerType saturate_cast(LibraryIntegerType x) noexcept; +template +BOOST_INT128_HOST_DEVICE constexpr TargetType saturate_cast(uint128_t value) noexcept; + +template +BOOST_INT128_HOST_DEVICE constexpr TargetType saturate_cast(int128_t value) noexcept; } // namespace int128 } // namespace boost diff --git a/doc/modules/ROOT/pages/overview.adoc b/doc/modules/ROOT/pages/overview.adoc index 42d208ce..bf37e313 100644 --- a/doc/modules/ROOT/pages/overview.adoc +++ b/doc/modules/ROOT/pages/overview.adoc @@ -24,8 +24,8 @@ When using pass:[C++20] or newer, the library can be consumed as a module `impor GCC and Clang offer `__int128` as a non-standard extension on 64-bit targets, but it lacks `std::numeric_limits` specializations, `` support, and is absent entirely on MSVC. Multiprecision libraries can fill the gap, but typically at the cost of a larger `sizeof` and additional overhead (e.g., Boost.Multiprecision always has an extra word). Boost.Int128 solves this by providing types that are exactly 128-bits on every platform. -Operation implementations rely on compiler intrinsic where available for native performance, and optimized software implementations elsewhere. -The types provided by the library also have native support being running on GPU using CUDA, along with many of the functions. +Operation implementations rely on compiler intrinsics where available for native performance, and optimized software implementations elsewhere. +The types provided by the library also natively support running on GPUs using CUDA, along with many of the functions. == Use Cases diff --git a/doc/modules/ROOT/pages/uint128_t.adoc b/doc/modules/ROOT/pages/uint128_t.adoc index efe7d742..88802b2c 100644 --- a/doc/modules/ROOT/pages/uint128_t.adoc +++ b/doc/modules/ROOT/pages/uint128_t.adoc @@ -405,18 +405,18 @@ This operation is subject to mixed sign limitations discussed xref:uint128_t.ado [source, c++] ---- template -BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const Integer rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator>>(const uint128_t lhs, const Integer rhs) noexcept; template ::value && (sizeof(Integer) * 8 > 16), bool> = true> -BOOST_INT128_HOST_DEVICE constexpr Integer operator<<(const Integer lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr Integer operator>>(const Integer lhs, const uint128_t rhs) noexcept; template && (sizeof(SignedInteger) * 8 <= 16), bool> = true> -BOOST_INT128_HOST_DEVICE constexpr int operator<<(const SignedInteger lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr int operator>>(const SignedInteger lhs, const uint128_t rhs) noexcept; template && (sizeof(UnsignedInteger) * 8 <= 16), bool> = true> -BOOST_INT128_HOST_DEVICE constexpr unsigned int operator<<(const UnsignedInteger lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr unsigned int operator>>(const UnsignedInteger lhs, const uint128_t rhs) noexcept; -BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const uint128_t rhs) noexcept; +BOOST_INT128_HOST_DEVICE constexpr uint128_t operator>>(const uint128_t lhs, const uint128_t rhs) noexcept; ---- Returns the bitwise right shift of `lhs` without exception. From f4d48d08e7a300b150e411e129b0aa1b9144f7c9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 26 Mar 2026 08:45:43 -0400 Subject: [PATCH 123/137] Fix duplicated benchmark data --- doc/modules/ROOT/pages/u128_benchmarks.adoc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/modules/ROOT/pages/u128_benchmarks.adoc b/doc/modules/ROOT/pages/u128_benchmarks.adoc index fabf1463..88f9a03b 100644 --- a/doc/modules/ROOT/pages/u128_benchmarks.adoc +++ b/doc/modules/ROOT/pages/u128_benchmarks.adoc @@ -12,7 +12,7 @@ https://www.boost.org/LICENSE_1_0.txt The benchmarks below represent the time in microseconds it takes to perform 20'000'000 operations between two values of random width (e.g. 2x1 words, 1x2 words, etc.). On most platforms we use the builtin `unsigned \__int128` as the reference benchmark. -When this is unavailable (such as on 32-bit architectures) we us `boost::multiprecision::uint128_t` (abbreviated as `boost::mp::uint128_t`) as it is widely used, and known to be portable. +When this is unavailable (such as on 32-bit architectures) we use `boost::multiprecision::uint128_t` (abbreviated as `boost::mp::uint128_t`) as it is widely used, and known to be portable. On MSVC platforms we use as reference `std::_Unsigned128` from the header `<__msvc_int128.hpp>` since this is bundled with their compiler. [#u128_linux] @@ -56,7 +56,7 @@ image::u128_graphs/linux/x64_relative_performance.png[x64 Relative Performance, image::u128_graphs/linux/ARM64_benchmarks.png[ARM64 Benchmark Results, width=100%] //// -image::u128_graphs/linux/ARM64_relative_performance.png[x64 Relative Performance, width=100%] +image::u128_graphs/linux/ARM64_relative_performance.png[ARM64 Relative Performance, width=100%] === S390x @@ -232,12 +232,12 @@ image::u128_graphs/macos/ARM64_relative_performance.png[ARM64 Relative Performan |=== | Operation | `unsigned __int128` | `uint128_t` | `boost::mp::uint128_t` -| Comparisons | 131902 | 133564 | 134182 -| Addition | 20613 | 17912 | 40176 -| Subtraction | 20484 | 18237 | 40311 -| Multiplication | 20160 | 20580 | 43285 -| Division | 686521 | 699201 | 945928 -| Modulo | 777084 | 724648 | 953117 +| Comparisons | 688225 | 712352 | 689146 +| Addition | 104921 | 124992 | 137819 +| Subtraction | 129150 | 102302 | 153484 +| Multiplication | 120363 | 119652 | 164100 +| Division | 2333812 | 1981469 | 2784139 +| Modulo | 2621949 | 2219481 | 2736682 |=== //// From 9e741f55caddcfc8a9094cf7dd88d53c18a1ea11 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:55:18 +0000 Subject: [PATCH 124/137] Bump convict from 6.2.4 to 6.2.5 in /doc Bumps [convict](https://github.com/mozilla/node-convict) from 6.2.4 to 6.2.5. - [Changelog](https://github.com/mozilla/node-convict/blob/master/CHANGELOG.md) - [Commits](https://github.com/mozilla/node-convict/commits) --- updated-dependencies: - dependency-name: convict dependency-version: 6.2.5 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- doc/package-lock.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/package-lock.json b/doc/package-lock.json index b91e6bad..05f7dccd 100644 --- a/doc/package-lock.json +++ b/doc/package-lock.json @@ -688,10 +688,11 @@ "license": "MIT" }, "node_modules/convict": { - "version": "6.2.4", - "resolved": "https://registry.npmjs.org/convict/-/convict-6.2.4.tgz", - "integrity": "sha512-qN60BAwdMVdofckX7AlohVJ2x9UvjTNoKVXCL2LxFk1l7757EJqf1nySdMkPQer0bt8kQ5lQiyZ9/2NvrFBuwQ==", + "version": "6.2.5", + "resolved": "https://registry.npmjs.org/convict/-/convict-6.2.5.tgz", + "integrity": "sha512-JtXpxqDqJ8P0UwEHwhxLzCIXQy97vlYBZR222Sbzb1q1Erex9ASrztJ29SyhWFQjod1AeFBaPzEEC8YvtZMIYg==", "dev": true, + "license": "Apache-2.0", "dependencies": { "lodash.clonedeep": "^4.5.0", "yargs-parser": "^20.2.7" From a6462fbe507d716b5d1b2da2d6de5e2cef17b52b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 27 Mar 2026 02:31:56 +0000 Subject: [PATCH 125/137] Bump handlebars from 4.7.8 to 4.7.9 in /doc Bumps [handlebars](https://github.com/handlebars-lang/handlebars.js) from 4.7.8 to 4.7.9. - [Release notes](https://github.com/handlebars-lang/handlebars.js/releases) - [Changelog](https://github.com/handlebars-lang/handlebars.js/blob/v4.7.9/release-notes.md) - [Commits](https://github.com/handlebars-lang/handlebars.js/compare/v4.7.8...v4.7.9) --- updated-dependencies: - dependency-name: handlebars dependency-version: 4.7.9 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- doc/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/package-lock.json b/doc/package-lock.json index 05f7dccd..d693fdc1 100644 --- a/doc/package-lock.json +++ b/doc/package-lock.json @@ -1102,9 +1102,9 @@ } }, "node_modules/handlebars": { - "version": "4.7.8", - "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz", - "integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==", + "version": "4.7.9", + "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.9.tgz", + "integrity": "sha512-4E71E0rpOaQuJR2A3xDZ+GM1HyWYv1clR58tC8emQNeQe3RH7MAzSbat+V0wG78LQBo6m6bzSG/L4pBuCsgnUQ==", "dev": true, "license": "MIT", "dependencies": { From 5d44bdce640ca73424ff312358a7c852894d3817 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:22:07 -0400 Subject: [PATCH 126/137] Add reproducer test set --- test/Jamfile | 1 + test/github_issue_377.cpp | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 test/github_issue_377.cpp diff --git a/test/Jamfile b/test/Jamfile index de0ae2d9..4c18efb3 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -112,6 +112,7 @@ run github_issue_207.cpp ; run github_issue_210.cpp ; run github_issue_221.cpp ; run github_issue_272.cpp ; +run github_issue_377.cpp ; # Compilation of individual headers compile compile_tests/int128_master_header_compile.cpp ; diff --git a/test/github_issue_377.cpp b/test/github_issue_377.cpp new file mode 100644 index 00000000..039e152c --- /dev/null +++ b/test/github_issue_377.cpp @@ -0,0 +1,45 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt +// +// See: https://github.com/cppalliance/int128/issues/377 + +#include +#include +#include +#include + +using namespace boost::int128; + +template +void test_div_by_one() +{ + constexpr auto min_val {std::numeric_limits::min()}; + BOOST_TEST_EQ(min_val, min_val / T{1}); +} + +template +void test_other_vals() +{ + constexpr auto min_val {std::numeric_limits::min()}; + constexpr auto min_div_2 {BOOST_INT128_INT128_C(-85070591730234615865843651857942052864)}; + constexpr auto min_div_4 {BOOST_INT128_INT128_C(-42535295865117307932921825928971026432)}; + constexpr auto min_div_16 {BOOST_INT128_INT128_C(-10633823966279326983230456482242756608)}; + + BOOST_TEST_EQ(min_div_2, min_val / T{2}); + BOOST_TEST_EQ(min_div_4, min_val / T{4}); + BOOST_TEST_EQ(min_div_16, min_val / T{16}); +} + +int main() +{ + test_div_by_one(); + test_div_by_one(); + test_div_by_one(); + + test_other_vals(); + test_other_vals(); + test_other_vals(); + + return boost::report_errors(); +} From 67b894db0f5fd582ede798025f8cb19990929cea Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:22:18 -0400 Subject: [PATCH 127/137] Add check for abs(min) --- include/boost/int128/detail/int128_imp.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 0a1be3f8..e1378e79 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -2881,18 +2881,18 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const return {0, 0}; } + constexpr int128_t min_val {INT64_MIN, 0}; const auto abs_lhs {abs(lhs)}; const auto abs_rhs {abs(rhs)}; - if (abs_lhs < abs_rhs) + if (lhs != min_val && abs_lhs < abs_rhs) { return {0,0}; } #if defined(BOOST_INT128_HAS_INT128) - else - { - return static_cast(static_cast(lhs) / static_cast(rhs)); - } + + return static_cast(static_cast(lhs) / static_cast(rhs)); + #else int128_t quotient {}; @@ -2989,11 +2989,12 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const int128_t quotient {}; + constexpr int128_t min_val {INT64_MIN, 0}; const auto negative_res {static_cast((lhs.high < 0) ^ (rhs < 0))}; const auto abs_rhs {rhs < 0 ? -rhs : rhs}; const auto abs_lhs {abs(lhs)}; - if (abs_lhs < abs_rhs) + if (lhs != min_val && abs_lhs < abs_rhs) { return {0, 0}; } From dfc0f969b9b910ac62b8666371b6f2409de7f419 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:22:32 -0400 Subject: [PATCH 128/137] Properly cast the high word --- include/boost/int128/detail/common_div.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/boost/int128/detail/common_div.hpp b/include/boost/int128/detail/common_div.hpp index fd3826a0..b434ef06 100644 --- a/include/boost/int128/detail/common_div.hpp +++ b/include/boost/int128/detail/common_div.hpp @@ -56,9 +56,11 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void half_word_div( template BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void half_word_div(const T& lhs, const std::uint32_t rhs, T& quotient) noexcept { + using high_word_type = decltype(T{}.high); + BOOST_INT128_ASSUME(rhs != 0); // LCOV_EXCL_LINE - quotient.high = lhs.high / rhs; + quotient.high = static_cast(static_cast(lhs.high) / rhs); auto remainder {((static_cast(lhs.high) % rhs) << 32) | (lhs.low >> 32)}; quotient.low = (remainder / rhs) << 32; remainder = ((remainder % rhs) << 32) | (lhs.low & UINT32_MAX); From fb7a4b1b3d5d3f4f61169df99a2454c19da10909 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:47:11 -0400 Subject: [PATCH 129/137] Fix UBSAN error --- include/boost/int128/detail/int128_imp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index e1378e79..b57c756b 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -317,7 +317,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t value) noexcept { - return (value.low == 0) ? int128_t{-value.high, 0} : + return (value.low == 0) ? int128_t{static_cast(0ULL - static_cast(value.high)), 0} : int128_t{~value.high, ~value.low + 1}; } From 65545f9fd88a2825f18fd650dc099d98bf8f137c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:47:28 -0400 Subject: [PATCH 130/137] Similar corrections for modulo --- include/boost/int128/detail/int128_imp.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index b57c756b..7c676fa6 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -2964,7 +2964,8 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const UnsignedInteger lhs, { auto abs_rhs {abs(rhs)}; const auto res {static_cast(lhs) / abs_rhs.low}; - return int128_t{rhs.high, res}; + const int128_t result {0, res}; + return rhs < 0 ? -result : result; } #else @@ -3215,9 +3216,9 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const UnsignedInteger lhs, return lhs; } - const int128_t remainder {0, static_cast(lhs) % rhs.low}; + const int128_t remainder {0, static_cast(lhs) % abs_rhs.low}; - return rhs < 0 ? -remainder : remainder; + return remainder; #else @@ -3248,10 +3249,11 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const return {0, 0}; } + constexpr int128_t min_val {INT64_MIN, 0}; const auto abs_lhs {abs(lhs)}; const auto abs_rhs {abs(rhs)}; - if (abs_rhs > abs_lhs) + if (lhs != min_val && rhs != min_val && abs_rhs > abs_lhs) { return lhs; } From cf10f2b3e96f6e8a0342caf19ee5bd3aae9e7475 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:47:37 -0400 Subject: [PATCH 131/137] Fix copy paste error --- include/boost/int128/detail/int128_imp.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 7c676fa6..715b0023 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -2058,7 +2058,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator>>(const return 0; } - return lhs << rhs.low; + return lhs >> rhs.low; } #ifdef BOOST_INT128_HAS_INT128 @@ -2072,7 +2072,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 oper return 0; } - return lhs << rhs.low; + return lhs >> rhs.low; } BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const int128_t rhs) noexcept @@ -2084,7 +2084,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 oper return 0; } - return lhs << rhs.low; + return lhs >> rhs.low; } #endif From e80e7f11cfcd43215b7aa727ffe56b281ec9a70e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:47:54 -0400 Subject: [PATCH 132/137] Consolidate the other bugs --- test/github_issue_377.cpp | 105 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/test/github_issue_377.cpp b/test/github_issue_377.cpp index 039e152c..09ace1e5 100644 --- a/test/github_issue_377.cpp +++ b/test/github_issue_377.cpp @@ -4,6 +4,7 @@ // // See: https://github.com/cppalliance/int128/issues/377 +#define BOOST_INT128_ALLOW_SIGN_CONVERSION #include #include #include @@ -31,6 +32,105 @@ void test_other_vals() BOOST_TEST_EQ(min_div_16, min_val / T{16}); } +// Bug 1: operator>>(int128_t, int128_t) was calling << instead of >> +void test_right_shift_int128_amount() +{ + const auto val {int128_t(0, 0xFF00)}; + const auto shift_4 {int128_t(0, 4)}; + + // Right-shift with int128_t shift amount must match integer shift + BOOST_TEST_EQ(val >> shift_4, val >> 4); + + const auto expected_ff0 {int128_t(0, 0xFF0)}; + BOOST_TEST_EQ(val >> shift_4, expected_ff0); + + // Test >>= with int128_t rhs + auto val2 {val}; + val2 >>= shift_4; + BOOST_TEST_EQ(val2, expected_ff0); + + // Cross-word shift + const auto big_val {int128_t(0x1234, 0)}; + const auto shift_64 {int128_t(0, 64)}; + const auto expected_1234 {int128_t(0, 0x1234)}; + BOOST_TEST_EQ(big_val >> shift_64, expected_1234); + + // Arithmetic right shift preserves sign for negative values + constexpr auto min_val {std::numeric_limits::min()}; + const auto shift_1 {int128_t(0, 1)}; + BOOST_TEST_EQ(min_val >> shift_1, min_val >> 1); + BOOST_TEST((min_val >> shift_1) < 0); +} + +// Bug 2: UnsignedInteger / int128_t returned {rhs.high, res} instead of proper sign handling +void test_unsigned_div_negative_int128() +{ + const std::uint64_t lhs {10}; + const auto neg3 {-int128_t(0, 3)}; + const auto pos3 {int128_t(0, 3)}; + const auto expected_neg3 {-int128_t(0, 3)}; + const auto expected_pos3 {int128_t(0, 3)}; + + // 10 / -3 = -3 + BOOST_TEST_EQ(lhs / neg3, expected_neg3); + + // 10 / 3 = 3 + BOOST_TEST_EQ(lhs / pos3, expected_pos3); + + // 7 / -1 = -7 + const std::uint64_t seven {7}; + const auto neg1 {-int128_t(0, 1)}; + const auto expected_neg7 {-int128_t(0, 7)}; + BOOST_TEST_EQ(seven / neg1, expected_neg7); +} + +// Bug 3: UnsignedInteger % int128_t used rhs.low instead of abs_rhs.low +// and applied wrong sign to remainder +void test_unsigned_mod_negative_int128() +{ + const std::uint64_t lhs {10}; + const auto neg3 {-int128_t(0, 3)}; + const auto pos3 {int128_t(0, 3)}; + const auto expected_1 {int128_t(0, 1)}; + + // 10 % -3 = 1 (remainder has sign of dividend, which is unsigned/positive) + BOOST_TEST_EQ(lhs % neg3, expected_1); + + // 10 % 3 = 1 + BOOST_TEST_EQ(lhs % pos3, expected_1); + + // 12 % -5 = 2 + const std::uint64_t twelve {12}; + const auto neg5 {-int128_t(0, 5)}; + const auto expected_2 {int128_t(0, 2)}; + BOOST_TEST_EQ(twelve % neg5, expected_2); +} + +// Bug 4: operator%(int128_t, int128_t) early return was wrong when lhs = INT128_MIN +// because abs(INT128_MIN) overflows back to INT128_MIN +void test_min_val_modulo() +{ + constexpr auto min_val {std::numeric_limits::min()}; + const auto zero {int128_t(0, 0)}; + + // INT128_MIN % 1 = 0 + const auto one {int128_t(0, 1)}; + BOOST_TEST_EQ(min_val % one, zero); + + // INT128_MIN % 2 = 0 (2^127 is even) + const auto two {int128_t(0, 2)}; + BOOST_TEST_EQ(min_val % two, zero); + + // INT128_MIN % 3 = -2 + // -170141183460469231731687303715884105728 = -56713727820156410577229101238628035242 * 3 + (-2) + const auto three {int128_t(0, 3)}; + const auto expected_neg2 {BOOST_INT128_INT128_C(-2)}; + BOOST_TEST_EQ(min_val % three, expected_neg2); + + // INT128_MIN % INT128_MIN = 0 + BOOST_TEST_EQ(min_val % min_val, zero); +} + int main() { test_div_by_one(); @@ -41,5 +141,10 @@ int main() test_other_vals(); test_other_vals(); + test_right_shift_int128_amount(); + test_unsigned_div_negative_int128(); + test_unsigned_mod_negative_int128(); + test_min_val_modulo(); + return boost::report_errors(); } From 3ffebe0c0a4d3eb75c8b2ab6243fdb766dbea064 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 Mar 2026 15:59:07 -0400 Subject: [PATCH 133/137] Workaround for old clang --- test/github_issue_377.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/github_issue_377.cpp b/test/github_issue_377.cpp index 09ace1e5..44f21817 100644 --- a/test/github_issue_377.cpp +++ b/test/github_issue_377.cpp @@ -23,9 +23,9 @@ template void test_other_vals() { constexpr auto min_val {std::numeric_limits::min()}; - constexpr auto min_div_2 {BOOST_INT128_INT128_C(-85070591730234615865843651857942052864)}; - constexpr auto min_div_4 {BOOST_INT128_INT128_C(-42535295865117307932921825928971026432)}; - constexpr auto min_div_16 {BOOST_INT128_INT128_C(-10633823966279326983230456482242756608)}; + const auto min_div_2 {BOOST_INT128_INT128_C(-85070591730234615865843651857942052864)}; + const auto min_div_4 {BOOST_INT128_INT128_C(-42535295865117307932921825928971026432)}; + const auto min_div_16 {BOOST_INT128_INT128_C(-10633823966279326983230456482242756608)}; BOOST_TEST_EQ(min_div_2, min_val / T{2}); BOOST_TEST_EQ(min_div_4, min_val / T{4}); From edd235e1b58dfec073af7f2d6302cb72a621573d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 31 Mar 2026 11:48:03 -0400 Subject: [PATCH 134/137] Flip direction --- include/boost/int128/detail/common_div.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/int128/detail/common_div.hpp b/include/boost/int128/detail/common_div.hpp index b434ef06..3ad30332 100644 --- a/include/boost/int128/detail/common_div.hpp +++ b/include/boost/int128/detail/common_div.hpp @@ -129,7 +129,7 @@ BOOST_INT128_HOST_DEVICE constexpr void knuth_divide(std::uint32_t (&u)[u_size], while (q_hat > UINT32_MAX || (q_hat * vn[n-2]) > ((r_hat << 32) | un[j+n-2])) { - q_hat--; + --q_hat; r_hat += vn[n-1]; if (r_hat > UINT32_MAX) { From 491bd26271b2d5df0c38f2d33fcc3b699ffd833a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 31 Mar 2026 13:12:41 -0400 Subject: [PATCH 135/137] Remove useless cast --- include/boost/int128/detail/int128_imp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index 715b0023..a9bdd9ca 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -3264,7 +3264,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const } #else - const auto is_neg{static_cast(lhs < 0)}; + const auto is_neg{lhs < 0}; int128_t remainder {}; From d725c68c4c4650b834b42496439a30906170e731 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 31 Mar 2026 13:12:55 -0400 Subject: [PATCH 136/137] Ignore useless cast warning since it's only useless on 32-bit --- include/boost/int128/detail/mini_from_chars.hpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/boost/int128/detail/mini_from_chars.hpp b/include/boost/int128/detail/mini_from_chars.hpp index a3198144..b89c088e 100644 --- a/include/boost/int128/detail/mini_from_chars.hpp +++ b/include/boost/int128/detail/mini_from_chars.hpp @@ -183,7 +183,7 @@ BOOST_INT128_HOST_DEVICE constexpr int from_chars_integer_impl(const char* first } // Return the parsed value, adding the sign back if applicable - // If we have overflowed then we do not return the result + // If we have overflowed, then we do not return the result if (overflowed) { return EDOM; @@ -201,7 +201,18 @@ BOOST_INT128_HOST_DEVICE constexpr int from_chars_integer_impl(const char* first // This value will be negative to differentiate from errno values // since they are in the range of acceptable distances + + // This cast is useless on 32-bit platforms + #ifdef __GNUC__ + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wuseless-cast" + #endif + return static_cast(first - next); + + #ifdef __GNUC__ + # pragma GCC diagnostic pop + #endif } } // namespace impl From f3884ceb4b0a7ba92ac2f800f11a03751de9213c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 31 Mar 2026 13:26:38 -0400 Subject: [PATCH 137/137] Clang does not have -Wuseless-cast --- include/boost/int128/detail/mini_from_chars.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/int128/detail/mini_from_chars.hpp b/include/boost/int128/detail/mini_from_chars.hpp index b89c088e..a399e1d8 100644 --- a/include/boost/int128/detail/mini_from_chars.hpp +++ b/include/boost/int128/detail/mini_from_chars.hpp @@ -203,14 +203,14 @@ BOOST_INT128_HOST_DEVICE constexpr int from_chars_integer_impl(const char* first // since they are in the range of acceptable distances // This cast is useless on 32-bit platforms - #ifdef __GNUC__ + #if defined(__GNUC__) && !defined(__clang__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wuseless-cast" #endif return static_cast(first - next); - #ifdef __GNUC__ + #if defined(__GNUC__) && !defined(__clang__) # pragma GCC diagnostic pop #endif }