From 77413e675a7b26e4f7570386d66eaa92face848a Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 4 Mar 2024 10:50:40 +0100 Subject: [PATCH 001/432] Downgrade Python to more supported version by PM, bring back my kazoo shutdown implementation which works --- docker/compose/Dockerfile | 4 ++-- docker/scripts/nbl/ci/dev/lib/kazoo.py | 18 +++++------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/docker/compose/Dockerfile b/docker/compose/Dockerfile index 60637594b9..a3f166eca0 100644 --- a/docker/compose/Dockerfile +++ b/docker/compose/Dockerfile @@ -68,9 +68,9 @@ RUN ` choco install -y strawberryperl --version 5.28.2.1 RUN ` - # Download & install Python 3.11.7 + # Download & install Python 3.11.6 ` - choco install -y python --version 3.11.7 + choco install -y python --version 3.11.6 RUN ` # Donwload debugpy Python module diff --git a/docker/scripts/nbl/ci/dev/lib/kazoo.py b/docker/scripts/nbl/ci/dev/lib/kazoo.py index 47d5b54205..2e67b4edf4 100644 --- a/docker/scripts/nbl/ci/dev/lib/kazoo.py +++ b/docker/scripts/nbl/ci/dev/lib/kazoo.py @@ -62,16 +62,6 @@ def appendKazooAtomic(self, zNodePath, data): pass -def shutdownOs(): - if os.name == 'nt' or os.name == 'java': # For windows and java (in the rare case of running jython) - return os.system('shutdown /s /f 0') - elif os.name == 'posix': # For Unix, Linux, Mac - return os.system('shutdown -h now') - else: - print('Unknown operating system') # Docs for os.name listed only the above three cases - return 1 - - def healthyCheck(host): try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: @@ -92,8 +82,10 @@ def healthyCheck(host): if shutdown: print("Requested shutdown...") - if shutdownOs() != 0: - print(f"Could not shutdown container") + try: + subprocess.run(f"shutdown /s /f", check=True) + except subprocess.CalledProcessError as e: + print(f"Could not shutdown container because of: {e.stderr}") return True except (socket.error, socket.timeout): @@ -110,4 +102,4 @@ def healthyCheck(host): if healthyCheck(args.host): sys.exit(0) # healthy else: - sys.exit(1) # not healthy + sys.exit(1) # not healthy \ No newline at end of file From d6ce5d2376f1eb7b08b45074c5272c9cdc4d5934 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Wed, 6 Mar 2024 20:18:27 +0100 Subject: [PATCH 002/432] Create cmake/cpack/find/nabla.cmake & cmake/cpack/find/compoment/template.cmake, script component install rules has been added creating interface install variables in separate files which can be used with find_package. TODO: global configuration Nabla file including the component files if found in package, makes flexible search requests for package parts --- 3rdparty/CMakeLists.txt | 2 +- cmake/cpack/find/compoment/template.cmake | 1 + cmake/cpack/find/nabla.cmake | 61 +++++++++++++++++++++++ cmake/cpack/package.cmake | 3 +- 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 cmake/cpack/find/compoment/template.cmake create mode 100644 cmake/cpack/find/nabla.cmake diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 6a4c33c0a9..7c2bd276ae 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -63,8 +63,8 @@ set(_OLD_SKIP_INSTALL_ALL ${SKIP_INSTALL_ALL}) set(BUILD_SHARED_LIBS OFF) set(SKIP_INSTALL_ALL ON) -add_subdirectory(zlib zlib EXCLUDE_FROM_ALL) file(LOCK "${CMAKE_CURRENT_SOURCE_DIR}/zlib" DIRECTORY GUARD PROCESS RESULT_VARIABLE NBL_LOCK TIMEOUT 60) +add_subdirectory(zlib zlib EXCLUDE_FROM_ALL) if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/zlib/zconf.h.included") execute_process(COMMAND "${CMAKE_COMMAND}" -E rename zconf.h.included zconf.h WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/zlib" diff --git a/cmake/cpack/find/compoment/template.cmake b/cmake/cpack/find/compoment/template.cmake new file mode 100644 index 0000000000..ce0c6e9108 --- /dev/null +++ b/cmake/cpack/find/compoment/template.cmake @@ -0,0 +1 @@ +list(TRANSFORM @_NBL_PROXY_@ PREPEND "${CMAKE_CURRENT_LIST_DIR}/") \ No newline at end of file diff --git a/cmake/cpack/find/nabla.cmake b/cmake/cpack/find/nabla.cmake new file mode 100644 index 0000000000..2258263cc5 --- /dev/null +++ b/cmake/cpack/find/nabla.cmake @@ -0,0 +1,61 @@ +function(NBL_GEN_FIND_NABLA_CODE_IMPL _COMPOMENT_ _SPATH_) +string(APPEND NBL_FIND_NABLA_IMPL "set(_COMPOMENT_ ${_COMPOMENT_})\nset(_SPATH_ ${_SPATH_})\n\nset(NBL_ROOT_PATH ${NBL_ROOT_PATH})\n\n") +string(APPEND NBL_FIND_NABLA_IMPL +[=[ +if(CMAKE_INSTALL_CONFIG_NAME MATCHES "^([Dd][Ee][Bb][Uu][Gg])$") + set(NBL_CONFIG_PREFIX_PATH debug) +elseif(CMAKE_INSTALL_CONFIG_NAME MATCHES "^([Rr][Ee][Ll][Ww][Ii][Tt][Hh][Dd][Ee][Bb][Ii][Nn][Ff][Oo])$") + set(NBL_CONFIG_PREFIX_PATH relwithdebinfo) +elseif(CMAKE_INSTALL_CONFIG_NAME MATCHES "^([Rr][Ee][Ll][Ee][Aa][Ss][Ee])$") + unset(NBL_CONFIG_PREFIX_PATH) +else() + message(FATAL_ERROR "Internal error, requested \"${CMAKE_INSTALL_CONFIG_NAME}\" configuration is invalid!") +endif() + +string(REPLACE "${CMAKE_INSTALL_PREFIX}" "" NBL_CMAKE_INSTALL_MANIFEST_CONTENT "${CMAKE_INSTALL_MANIFEST_FILES}") +list(REMOVE_DUPLICATES NBL_CMAKE_INSTALL_MANIFEST_CONTENT) + +set(_NBL_PREFIX_ "${CMAKE_INSTALL_PREFIX}/${NBL_CONFIG_PREFIX_PATH}") +string(TOUPPER "${CMAKE_INSTALL_CONFIG_NAME}" _NBL_CONFIG_) +string(TOUPPER "${_COMPOMENT_}" _Cu_) +string(TOLOWER "${_COMPOMENT_}" _Cl_) + +set(NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY "${CMAKE_INSTALL_PREFIX}/${NBL_CONFIG_PREFIX_PATH}/cmake/compoment") +set(NBL_CMAKE_COMPOMENT_OUTPUT_FILE "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}/NablaConfig${_COMPOMENT_}.cmake") + +cmake_path(RELATIVE_PATH CMAKE_INSTALL_PREFIX BASE_DIRECTORY "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}" OUTPUT_VARIABLE _NBL_REL_TO_PREFIX_) + +foreach(_MANIFEST_INSTALL_REL_FILE_ IN LISTS NBL_CMAKE_INSTALL_MANIFEST_CONTENT) + string(FIND "${_MANIFEST_INSTALL_REL_FILE_}" "/${_SPATH_}/" _NBL_FOUND_) + + if(NOT "${_NBL_FOUND_}" STREQUAL "-1") + set(_X_ "${_NBL_REL_TO_PREFIX_}/${_MANIFEST_INSTALL_REL_FILE_}") + cmake_path(NORMAL_PATH _X_ OUTPUT_VARIABLE _X_) + + list(APPEND NBL_INSTALL_${_Cu_}_${_NBL_CONFIG_} "${_X_}") + endif() +endforeach() + +set(_NBL_PROXY_ NBL_INSTALL_${_Cu_}_${_NBL_CONFIG_}) + +string(APPEND NBL_MANIFEST_IMPL "set(${_NBL_PROXY_}\n\t${${_NBL_PROXY_}}\n)") +string(REPLACE ";" "\n\t" NBL_MANIFEST_IMPL "${NBL_MANIFEST_IMPL}") +string(CONFIGURE "${NBL_MANIFEST_IMPL}" NBL_MANIFEST_IMPL_CONF) +file(WRITE "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}" "${NBL_MANIFEST_IMPL_CONF}") + +# the reason behind this weird looking thing is you cannot nest bracket arguments https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument +# some variables need evaluation but some not and must be literals, to make this code read-able & work we do a small workaround +configure_file("${NBL_ROOT_PATH}/cmake/cpack/find/compoment/template.cmake" "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp" @ONLY) +file(READ "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp" _NBL_COMPOMENT_INCLUDE_LIST_TRANFORM_) +file(REMOVE "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp") +file(APPEND "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}" "\n${_NBL_COMPOMENT_INCLUDE_LIST_TRANFORM_}") +]=] +) + +install(CODE "${NBL_FIND_NABLA_IMPL}" COMPONENT ${_COMPOMENT_}) +endfunction() + +# Generate compoment configurations +NBL_GEN_FIND_NABLA_CODE_IMPL(Headers include) +NBL_GEN_FIND_NABLA_CODE_IMPL(Libraries lib) +NBL_GEN_FIND_NABLA_CODE_IMPL(Runtimes runtime) \ No newline at end of file diff --git a/cmake/cpack/package.cmake b/cmake/cpack/package.cmake index a72c7aadfe..3991bfeddf 100644 --- a/cmake/cpack/package.cmake +++ b/cmake/cpack/package.cmake @@ -87,4 +87,5 @@ set(CPACK_COMPONENT_HEADERS_DEPENDS Libraries Runtimes) set(CPACK_THREADS 0) # try to use all threads for compression -include(CPack) \ No newline at end of file +include(CPack) +include("${CMAKE_CURRENT_LIST_DIR}/find/nabla.cmake") \ No newline at end of file From 15381bac3a7365231d75b6a000d4064a3ef820cd Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 18 Mar 2024 09:12:25 +0100 Subject: [PATCH 003/432] begin implementing FindPackageHandleStandardArgs module, save work --- cmake/cpack/find/compoment/template.cmake | 5 ++- cmake/cpack/find/nabla.cmake | 39 +++++++++++++++++------ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/cmake/cpack/find/compoment/template.cmake b/cmake/cpack/find/compoment/template.cmake index ce0c6e9108..595053d410 100644 --- a/cmake/cpack/find/compoment/template.cmake +++ b/cmake/cpack/find/compoment/template.cmake @@ -1 +1,4 @@ -list(TRANSFORM @_NBL_PROXY_@ PREPEND "${CMAKE_CURRENT_LIST_DIR}/") \ No newline at end of file +list(TRANSFORM @_NBL_PROXY_@ PREPEND "${CMAKE_CURRENT_LIST_DIR}/") + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(@_NBL_PACKAGE_@ DEFAULT_MSG @_NBL_PROXY_@) \ No newline at end of file diff --git a/cmake/cpack/find/nabla.cmake b/cmake/cpack/find/nabla.cmake index 2258263cc5..8fa2938d12 100644 --- a/cmake/cpack/find/nabla.cmake +++ b/cmake/cpack/find/nabla.cmake @@ -1,4 +1,4 @@ -function(NBL_GEN_FIND_NABLA_CODE_IMPL _COMPOMENT_ _SPATH_) +function(NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL _COMPOMENT_ _SPATH_) string(APPEND NBL_FIND_NABLA_IMPL "set(_COMPOMENT_ ${_COMPOMENT_})\nset(_SPATH_ ${_SPATH_})\n\nset(NBL_ROOT_PATH ${NBL_ROOT_PATH})\n\n") string(APPEND NBL_FIND_NABLA_IMPL [=[ @@ -12,16 +12,20 @@ else() message(FATAL_ERROR "Internal error, requested \"${CMAKE_INSTALL_CONFIG_NAME}\" configuration is invalid!") endif() +string(TOUPPER "${CMAKE_INSTALL_CONFIG_NAME}" _NBL_CONFIG_) +set(_NBL_PACKAGE_ Nabla${_COMPOMENT_}) + +set(NBL_CMAKE_OUTPUT_DIRECTORY "${CMAKE_INSTALL_PREFIX}/${NBL_CONFIG_PREFIX_PATH}/cmake") +set(NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY "${NBL_CMAKE_OUTPUT_DIRECTORY}/component") + string(REPLACE "${CMAKE_INSTALL_PREFIX}" "" NBL_CMAKE_INSTALL_MANIFEST_CONTENT "${CMAKE_INSTALL_MANIFEST_FILES}") list(REMOVE_DUPLICATES NBL_CMAKE_INSTALL_MANIFEST_CONTENT) set(_NBL_PREFIX_ "${CMAKE_INSTALL_PREFIX}/${NBL_CONFIG_PREFIX_PATH}") -string(TOUPPER "${CMAKE_INSTALL_CONFIG_NAME}" _NBL_CONFIG_) string(TOUPPER "${_COMPOMENT_}" _Cu_) string(TOLOWER "${_COMPOMENT_}" _Cl_) -set(NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY "${CMAKE_INSTALL_PREFIX}/${NBL_CONFIG_PREFIX_PATH}/cmake/compoment") -set(NBL_CMAKE_COMPOMENT_OUTPUT_FILE "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}/NablaConfig${_COMPOMENT_}.cmake") +set(NBL_CMAKE_COMPOMENT_OUTPUT_FILE "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}/${_NBL_PACKAGE_}Config.cmake") cmake_path(RELATIVE_PATH CMAKE_INSTALL_PREFIX BASE_DIRECTORY "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}" OUTPUT_VARIABLE _NBL_REL_TO_PREFIX_) @@ -32,11 +36,11 @@ foreach(_MANIFEST_INSTALL_REL_FILE_ IN LISTS NBL_CMAKE_INSTALL_MANIFEST_CONTENT) set(_X_ "${_NBL_REL_TO_PREFIX_}/${_MANIFEST_INSTALL_REL_FILE_}") cmake_path(NORMAL_PATH _X_ OUTPUT_VARIABLE _X_) - list(APPEND NBL_INSTALL_${_Cu_}_${_NBL_CONFIG_} "${_X_}") + list(APPEND NABLA_INSTALL_${_Cu_}_${_NBL_CONFIG_} "${_X_}") endif() endforeach() -set(_NBL_PROXY_ NBL_INSTALL_${_Cu_}_${_NBL_CONFIG_}) +set(_NBL_PROXY_ NABLA_INSTALL_${_Cu_}_${_NBL_CONFIG_}) string(APPEND NBL_MANIFEST_IMPL "set(${_NBL_PROXY_}\n\t${${_NBL_PROXY_}}\n)") string(REPLACE ";" "\n\t" NBL_MANIFEST_IMPL "${NBL_MANIFEST_IMPL}") @@ -49,13 +53,28 @@ configure_file("${NBL_ROOT_PATH}/cmake/cpack/find/compoment/template.cmake" "${N file(READ "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp" _NBL_COMPOMENT_INCLUDE_LIST_TRANFORM_) file(REMOVE "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp") file(APPEND "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}" "\n${_NBL_COMPOMENT_INCLUDE_LIST_TRANFORM_}") + +# Config + ]=] ) install(CODE "${NBL_FIND_NABLA_IMPL}" COMPONENT ${_COMPOMENT_}) endfunction() -# Generate compoment configurations -NBL_GEN_FIND_NABLA_CODE_IMPL(Headers include) -NBL_GEN_FIND_NABLA_CODE_IMPL(Libraries lib) -NBL_GEN_FIND_NABLA_CODE_IMPL(Runtimes runtime) \ No newline at end of file +function(NBL_GEN_FIND_NABLA_CONFIG_CODE_IMPL) +string(APPEND NBL_FIND_NABLA_IMPL +[=[ + +]=] + +install(CODE "${NBL_FIND_NABLA_IMPL}" ALL_COMPONENTS) +endfunction() + +# Generate component configurations +NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL(Headers include) +NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL(Libraries lib) +NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL(Runtimes runtime) + +# Generate config file +NBL_GEN_FIND_NABLA_CONFIG_CODE_IMPL() \ No newline at end of file From 36696ed6af2a8250538c99b64b90c028a31fc53f Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 18 Mar 2024 14:17:56 +0100 Subject: [PATCH 004/432] Create dynamic & relocatable package generator, add config/template.cmake & licence/template.cmake, update apache zookeeper version --- cmake/cpack/find/config/template.cmake | 15 +++++++ cmake/cpack/find/licence/template.cmake | 3 ++ cmake/cpack/find/nabla.cmake | 54 +++++++++++++++---------- docker/compose/Dockerfile | 4 +- 4 files changed, 53 insertions(+), 23 deletions(-) create mode 100644 cmake/cpack/find/config/template.cmake create mode 100644 cmake/cpack/find/licence/template.cmake diff --git a/cmake/cpack/find/config/template.cmake b/cmake/cpack/find/config/template.cmake new file mode 100644 index 0000000000..a9ba30e354 --- /dev/null +++ b/cmake/cpack/find/config/template.cmake @@ -0,0 +1,15 @@ +find_package(@_NBL_PACKAGE_@ + REQUIRED + CONFIG + GLOBAL + PATHS "${CMAKE_CURRENT_LIST_DIR}/compoment" + NO_DEFAULT_PATH + NO_PACKAGE_ROOT_PATH + NO_CMAKE_PATH + NO_CMAKE_ENVIRONMENT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + NO_CMAKE_PACKAGE_REGISTRY + NO_CMAKE_SYSTEM_PATH + NO_CMAKE_INSTALL_PREFIX + NO_CMAKE_SYSTEM_PACKAGE_REGISTRY +) \ No newline at end of file diff --git a/cmake/cpack/find/licence/template.cmake b/cmake/cpack/find/licence/template.cmake new file mode 100644 index 0000000000..f1bd9360ca --- /dev/null +++ b/cmake/cpack/find/licence/template.cmake @@ -0,0 +1,3 @@ +# Copyright (C) 2018-2040 - DevSH Graphics Programming Sp. z O.O. +# This file is part of the "Nabla Engine". +# For conditions of distribution and use, see copyright notice in nabla.h \ No newline at end of file diff --git a/cmake/cpack/find/nabla.cmake b/cmake/cpack/find/nabla.cmake index 8fa2938d12..3c6fcd0b1d 100644 --- a/cmake/cpack/find/nabla.cmake +++ b/cmake/cpack/find/nabla.cmake @@ -1,5 +1,5 @@ function(NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL _COMPOMENT_ _SPATH_) -string(APPEND NBL_FIND_NABLA_IMPL "set(_COMPOMENT_ ${_COMPOMENT_})\nset(_SPATH_ ${_SPATH_})\n\nset(NBL_ROOT_PATH ${NBL_ROOT_PATH})\n\n") +string(APPEND NBL_FIND_NABLA_IMPL "set(_COMPOMENT_ ${_COMPOMENT_})\nset(_SPATH_ ${_SPATH_})\nset(NBL_ROOT_PATH ${NBL_ROOT_PATH})\nset(NBL_STATIC_BUILD ${NBL_STATIC_BUILD})\n\n") string(APPEND NBL_FIND_NABLA_IMPL [=[ if(CMAKE_INSTALL_CONFIG_NAME MATCHES "^([Dd][Ee][Bb][Uu][Gg])$") @@ -12,20 +12,34 @@ else() message(FATAL_ERROR "Internal error, requested \"${CMAKE_INSTALL_CONFIG_NAME}\" configuration is invalid!") endif() +string(TOUPPER "${_COMPOMENT_}" _Cu_) +string(TOLOWER "${_COMPOMENT_}" _Cl_) + string(TOUPPER "${CMAKE_INSTALL_CONFIG_NAME}" _NBL_CONFIG_) -set(_NBL_PACKAGE_ Nabla${_COMPOMENT_}) +string(TOLOWER "${CMAKE_INSTALL_CONFIG_NAME}" _NBL_CONFIG_L_) + +if(NBL_STATIC_BUILD) + set(NBL_LIBRARY_TYPE STATIC) +else() + set(NBL_LIBRARY_TYPE DYNAMIC) +endif() + +string(TOUPPER "${NBL_LIBRARY_TYPE}" _LTu_) +string(TOLOWER "${NBL_LIBRARY_TYPE}" _LTl_) + +set(_NBL_PACKAGE_ nabla-${_Cl_}-${_LTl_}-${_NBL_CONFIG_L_}) +set(_NBL_COMPLETE_P_CONFIG_ nabla-${_LTl_}-${_NBL_CONFIG_L_}) set(NBL_CMAKE_OUTPUT_DIRECTORY "${CMAKE_INSTALL_PREFIX}/${NBL_CONFIG_PREFIX_PATH}/cmake") -set(NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY "${NBL_CMAKE_OUTPUT_DIRECTORY}/component") +set(NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY "${NBL_CMAKE_OUTPUT_DIRECTORY}/compoment") string(REPLACE "${CMAKE_INSTALL_PREFIX}" "" NBL_CMAKE_INSTALL_MANIFEST_CONTENT "${CMAKE_INSTALL_MANIFEST_FILES}") list(REMOVE_DUPLICATES NBL_CMAKE_INSTALL_MANIFEST_CONTENT) set(_NBL_PREFIX_ "${CMAKE_INSTALL_PREFIX}/${NBL_CONFIG_PREFIX_PATH}") -string(TOUPPER "${_COMPOMENT_}" _Cu_) -string(TOLOWER "${_COMPOMENT_}" _Cl_) set(NBL_CMAKE_COMPOMENT_OUTPUT_FILE "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}/${_NBL_PACKAGE_}Config.cmake") +set(NBL_CMAKE_CONFIG_OUTPUT_FILE "${NBL_CMAKE_OUTPUT_DIRECTORY}/${_NBL_COMPLETE_P_CONFIG_}Config.cmake") cmake_path(RELATIVE_PATH CMAKE_INSTALL_PREFIX BASE_DIRECTORY "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}" OUTPUT_VARIABLE _NBL_REL_TO_PREFIX_) @@ -36,11 +50,11 @@ foreach(_MANIFEST_INSTALL_REL_FILE_ IN LISTS NBL_CMAKE_INSTALL_MANIFEST_CONTENT) set(_X_ "${_NBL_REL_TO_PREFIX_}/${_MANIFEST_INSTALL_REL_FILE_}") cmake_path(NORMAL_PATH _X_ OUTPUT_VARIABLE _X_) - list(APPEND NABLA_INSTALL_${_Cu_}_${_NBL_CONFIG_} "${_X_}") + list(APPEND NABLA_INSTALL_${_Cu_}_${_LTu_}_${_NBL_CONFIG_} "${_X_}") endif() endforeach() -set(_NBL_PROXY_ NABLA_INSTALL_${_Cu_}_${_NBL_CONFIG_}) +set(_NBL_PROXY_ NABLA_INSTALL_${_Cu_}_${_LTu_}_${_NBL_CONFIG_}) string(APPEND NBL_MANIFEST_IMPL "set(${_NBL_PROXY_}\n\t${${_NBL_PROXY_}}\n)") string(REPLACE ";" "\n\t" NBL_MANIFEST_IMPL "${NBL_MANIFEST_IMPL}") @@ -49,32 +63,30 @@ file(WRITE "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}" "${NBL_MANIFEST_IMPL_CONF}") # the reason behind this weird looking thing is you cannot nest bracket arguments https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument # some variables need evaluation but some not and must be literals, to make this code read-able & work we do a small workaround + +# Compoment configure_file("${NBL_ROOT_PATH}/cmake/cpack/find/compoment/template.cmake" "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp" @ONLY) file(READ "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp" _NBL_COMPOMENT_INCLUDE_LIST_TRANFORM_) file(REMOVE "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}.tmp") file(APPEND "${NBL_CMAKE_COMPOMENT_OUTPUT_FILE}" "\n${_NBL_COMPOMENT_INCLUDE_LIST_TRANFORM_}") # Config +if(NOT EXISTS "${NBL_CMAKE_CONFIG_OUTPUT_FILE}") + file(READ "${NBL_ROOT_PATH}/cmake/cpack/find/licence/template.cmake" _NBL_LICENCE_) + file(APPEND "${NBL_CMAKE_CONFIG_OUTPUT_FILE}" "${_NBL_LICENCE_}") +endif() +configure_file("${NBL_ROOT_PATH}/cmake/cpack/find/config/template.cmake" "${NBL_CMAKE_CONFIG_OUTPUT_FILE}.tmp" @ONLY) +file(READ "${NBL_CMAKE_CONFIG_OUTPUT_FILE}.tmp" _NBL_CONFIG_FILE_CONTENT_) +file(REMOVE "${NBL_CMAKE_CONFIG_OUTPUT_FILE}.tmp") +file(APPEND "${NBL_CMAKE_CONFIG_OUTPUT_FILE}" "\n\n${_NBL_CONFIG_FILE_CONTENT_}") ]=] ) install(CODE "${NBL_FIND_NABLA_IMPL}" COMPONENT ${_COMPOMENT_}) endfunction() -function(NBL_GEN_FIND_NABLA_CONFIG_CODE_IMPL) -string(APPEND NBL_FIND_NABLA_IMPL -[=[ - -]=] - -install(CODE "${NBL_FIND_NABLA_IMPL}" ALL_COMPONENTS) -endfunction() - -# Generate component configurations +# Generate compoment configurations NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL(Headers include) NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL(Libraries lib) -NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL(Runtimes runtime) - -# Generate config file -NBL_GEN_FIND_NABLA_CONFIG_CODE_IMPL() \ No newline at end of file +NBL_GEN_FIND_NABLA_COMPONENT_CODE_IMPL(Runtimes runtime) \ No newline at end of file diff --git a/docker/compose/Dockerfile b/docker/compose/Dockerfile index a3f166eca0..959c5de7b3 100644 --- a/docker/compose/Dockerfile +++ b/docker/compose/Dockerfile @@ -87,7 +87,7 @@ ARG APACHE_ZOOKEEPER_INSTALL_DIRECTORY RUN ` # Download Apache ZooKeeper ` - curl -SL --output zookeeper.zip https://dlcdn.apache.org/zookeeper/zookeeper-3.8.3/apache-zookeeper-3.8.3-bin.tar.gz ` + curl -SL --output zookeeper.zip https://dlcdn.apache.org/zookeeper/stable/apache-zookeeper-3.8.4-bin.tar.gz ` ` # Create install directory ` @@ -101,7 +101,7 @@ RUN ` ` && del /q zookeeper.zip ` ` - && setx PATH "%PATH%;%APACHE_ZOOKEEPER_INSTALL_DIRECTORY%\apache-zookeeper-3.8.3-bin\bin" /M + && setx PATH "%PATH%;%APACHE_ZOOKEEPER_INSTALL_DIRECTORY%\apache-zookeeper-3.8.4-bin\bin" /M RUN ` # Download kazoo 2.8.0 Python (more recent versions doesn't work well with Windows) module From 7b17a40e1aa6a9e27f077308283d560d2022b1cc Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 18 Mar 2024 20:05:41 +0100 Subject: [PATCH 005/432] Implement FindNabla.cmake, add NABLA_INSTALL__DIRECTORY__ install package variable, perform tests --- CMakeLists.txt | 3 ++ cmake/FindNabla.cmake | 39 +++++++++++++++++++++++ cmake/cpack/find/compoment/template.cmake | 4 ++- cmake/cpack/find/nabla.cmake | 11 +++++-- 4 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 cmake/FindNabla.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 7973541830..83f53ac1bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -362,3 +362,6 @@ option(NBL_CPACK_INCLUDE_EXAMPLES "CPack with examples and media" ON) include(cpack/package) export(TARGETS ${_NBL_3RDPARTY_TARGETS_} Nabla NAMESPACE Nabla:: APPEND FILE ${NBL_ROOT_PATH_BINARY}/NablaExport.cmake) + +#set(NBL_CONFIG_ROOT_DIRECTORY "C:/Users/arekl/Desktop/ci-nabla-amd64-mt-s-15381bac3a7365231d75b6a000d4064a3ef820cd-win64") +#find_package(Nabla REQUIRED) \ No newline at end of file diff --git a/cmake/FindNabla.cmake b/cmake/FindNabla.cmake new file mode 100644 index 0000000000..152933d02d --- /dev/null +++ b/cmake/FindNabla.cmake @@ -0,0 +1,39 @@ +# Define NBL_CONFIG_ROOT_DIRECTORY +# variable to help the module find +# Nabla package + +if(NOT DEFINED CMAKE_CONFIGURATION_TYPES) + set(CMAKE_CONFIGURATION_TYPES Release;RelWithDebInfo;Debug) +endif() + +if(NOT DEFINED NBL_PACKAGE_STATIC) # turn ON NBL_PACKAGE_STATIC to look for package with STATIC library type, turn off to look for DYNAMIC + if(${NBL_STATIC_BUILD}) # internal, if called with Nabla's build system it will get detected autoamtically + set(NBL_PACKAGE_STATIC ON) + else() + message(FATAL_ERROR "NBL_PACKAGE_STATIC must be defined!") + endif() +endif() + +if(NBL_PACKAGE_STATIC) + set(NBL_LIBRARY_TYPE static) +else() + set(NBL_LIBRARY_TYPE dynamic) +endif() + +foreach(X IN LISTS CMAKE_CONFIGURATION_TYPES) + if(NOT "${X}" STREQUAL "") + string(TOLOWER "nabla-${NBL_LIBRARY_TYPE}-${X}" _NBL_TARGET_PACKAGE_) + + if(DEFINED NBL_CONFIG_ROOT_DIRECTORY) + file(GLOB_RECURSE _NBL_G_CONFIG_ROOT_DIRECTORY_ "${NBL_CONFIG_ROOT_DIRECTORY}/*/${_NBL_TARGET_PACKAGE_}Config.cmake") + cmake_path(GET _NBL_G_CONFIG_ROOT_DIRECTORY_ PARENT_PATH _NBL_G_CONFIG_ROOT_DIRECTORY_) + else() + unset(_NBL_G_CONFIG_ROOT_DIRECTORY_) + endif() + + find_package(${_NBL_TARGET_PACKAGE_} QUIET + GLOBAL + PATHS ${_NBL_G_CONFIG_ROOT_DIRECTORY_} + ) + endif() +endforeach() \ No newline at end of file diff --git a/cmake/cpack/find/compoment/template.cmake b/cmake/cpack/find/compoment/template.cmake index 595053d410..f65c1a5233 100644 --- a/cmake/cpack/find/compoment/template.cmake +++ b/cmake/cpack/find/compoment/template.cmake @@ -1,4 +1,6 @@ list(TRANSFORM @_NBL_PROXY_@ PREPEND "${CMAKE_CURRENT_LIST_DIR}/") +set(@_NBL_COMPOMENT_D_@ "${CMAKE_CURRENT_LIST_DIR}/@_NBL_COMPOMENT_D_V_@") + include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(@_NBL_PACKAGE_@ DEFAULT_MSG @_NBL_PROXY_@) \ No newline at end of file +find_package_handle_standard_args(@_NBL_PACKAGE_@ DEFAULT_MSG @_NBL_PROXY_@ @_NBL_COMPOMENT_D_@) \ No newline at end of file diff --git a/cmake/cpack/find/nabla.cmake b/cmake/cpack/find/nabla.cmake index 3c6fcd0b1d..698157a6b7 100644 --- a/cmake/cpack/find/nabla.cmake +++ b/cmake/cpack/find/nabla.cmake @@ -15,6 +15,9 @@ endif() string(TOUPPER "${_COMPOMENT_}" _Cu_) string(TOLOWER "${_COMPOMENT_}" _Cl_) +string(TOUPPER "${_SPATH_}" _Su_) +string(TOLOWER "${_SPATH_}" _Sl_) + string(TOUPPER "${CMAKE_INSTALL_CONFIG_NAME}" _NBL_CONFIG_) string(TOLOWER "${CMAKE_INSTALL_CONFIG_NAME}" _NBL_CONFIG_L_) @@ -43,6 +46,10 @@ set(NBL_CMAKE_CONFIG_OUTPUT_FILE "${NBL_CMAKE_OUTPUT_DIRECTORY}/${_NBL_COMPLETE_ cmake_path(RELATIVE_PATH CMAKE_INSTALL_PREFIX BASE_DIRECTORY "${NBL_CMAKE_COMPOMENT_OUTPUT_DIRECTORY}" OUTPUT_VARIABLE _NBL_REL_TO_PREFIX_) +set(_NBL_PROXY_ NABLA_INSTALL_${_Cu_}_${_LTu_}_${_NBL_CONFIG_}) +set(_NBL_COMPOMENT_D_ "NABLA_INSTALL_${_Cu_}_DIRECTORY_${_LTu_}_${_NBL_CONFIG_}") +set(_NBL_COMPOMENT_D_V_ "${_NBL_REL_TO_PREFIX_}") + foreach(_MANIFEST_INSTALL_REL_FILE_ IN LISTS NBL_CMAKE_INSTALL_MANIFEST_CONTENT) string(FIND "${_MANIFEST_INSTALL_REL_FILE_}" "/${_SPATH_}/" _NBL_FOUND_) @@ -50,12 +57,10 @@ foreach(_MANIFEST_INSTALL_REL_FILE_ IN LISTS NBL_CMAKE_INSTALL_MANIFEST_CONTENT) set(_X_ "${_NBL_REL_TO_PREFIX_}/${_MANIFEST_INSTALL_REL_FILE_}") cmake_path(NORMAL_PATH _X_ OUTPUT_VARIABLE _X_) - list(APPEND NABLA_INSTALL_${_Cu_}_${_LTu_}_${_NBL_CONFIG_} "${_X_}") + list(APPEND ${_NBL_PROXY_} "${_X_}") endif() endforeach() -set(_NBL_PROXY_ NABLA_INSTALL_${_Cu_}_${_LTu_}_${_NBL_CONFIG_}) - string(APPEND NBL_MANIFEST_IMPL "set(${_NBL_PROXY_}\n\t${${_NBL_PROXY_}}\n)") string(REPLACE ";" "\n\t" NBL_MANIFEST_IMPL "${NBL_MANIFEST_IMPL}") string(CONFIGURE "${NBL_MANIFEST_IMPL}" NBL_MANIFEST_IMPL_CONF) From 4a1c47a47c1d9c4eaaa78d74a0ae633cb400e333 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 13 May 2024 15:26:55 +0200 Subject: [PATCH 006/432] Implemented emulated float64 type --- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 include/nbl/builtin/hlsl/emulated_float64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl new file mode 100644 index 0000000000..8ebfaf0a48 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -0,0 +1,129 @@ +template +T _static_cast(const U); + +using float32_t = float; +//using emulated_float64_t = double; + +namespace emulated +{ + struct emulated_float64_t + { + // TODO: change to `uint64_t` when on the emulation stage + using storage_t = float32_t; + + storage_t data; + + // constructors + // TODO: specializations? + template + static emulated_float64_t create(T val) + { + emulated_float64_t output; + output.data = val; + return output; + } + + // arithmetic operators + emulated_float64_t operator+(const emulated_float64_t rhs) + { + emulated_float64_t retval; + retval.data = data + rhs.data; + return retval; + } + + emulated_float64_t operator-(const emulated_float64_t rhs) + { + emulated_float64_t retval; + retval.data = data - rhs.data; + return retval; + } + + emulated_float64_t operator*(const emulated_float64_t rhs) + { + emulated_float64_t retval; + retval.data = data * rhs.data; + return retval; + } + + emulated_float64_t operator/(const emulated_float64_t rhs) + { + emulated_float64_t retval; + retval.data = data / rhs.data; + return retval; + } + + // relational operators + bool operator==(const emulated_float64_t rhs) { return !(uint64_t(data) ^ uint64_t(rhs.data)); } + bool operator!=(const emulated_float64_t rhs) { return uint64_t(data) ^ uint64_t(rhs.data); } + bool operator<(const emulated_float64_t rhs) { return data < rhs.data; } + bool operator>(const emulated_float64_t rhs) { return data > rhs.data; } + bool operator<=(const emulated_float64_t rhs) { return !operator>(rhs); } + bool operator>=(const emulated_float64_t rhs) { return !operator<(rhs); } + + //logical operators + bool operator&&(const emulated_float64_t rhs) { return bool(data) && bool(rhs.data); } + bool operator||(const emulated_float64_t rhs) { return bool(data) || bool(rhs.data); } + bool operator!() { return !bool(data); } + + // conversion operators + operator bool() { return bool(data); } + operator int() { return int(data); } + operator uint32_t() { return uint32_t(data); } + operator uint64_t() { return uint64_t(data); } + operator float() { return float(data); } +#ifdef __HLSL_VERSION + operator min16int() { return min16int(data);} + operator float64_t() { return float64_t(data); } + operator half() { return half(data); } +#else + operator uint16_t() { return uint16_t(data);} + operator double() { return double(data); } +#endif + + //explicit operator int() const { return int(data); } + + // HERE OMITED OPERATORS + // - not implementing bitwise and modulo operators since floating point types doesn't support them + // - compound operator overload not supported in HLSL + // - access operators (dereference and addressof) not supported in HLSL +#ifndef __HLSL_VERSION + // compound assignment operators + emulated_float64_t operator+=(emulated_float64_t rhs) + { + data = data + rhs.data; + return create(data); + } + + emulated_float64_t operator-=(emulated_float64_t rhs) + { + data = data - rhs.data; + return create(data); + } + + emulated_float64_t operator*=(emulated_float64_t rhs) + { + data = data * rhs.data; + return create(data); + } + + emulated_float64_t operator/=(emulated_float64_t rhs) + { + data = data / rhs.data; + return create(data); + } + + // access operators + emulated_float64_t operator*() { return *this; } + emulated_float64_t* operator&() { return this; } +#endif + }; +} + +// upgrades float to a double +template<> +emulated::emulated_float64_t _static_cast(const float val) +{ + emulated::emulated_float64_t retval; + retval.data = val; // TODO: manually upgrade the `val` IEEE754 32bit pattern to 64bit + return retval; +} From 76ec495a7d5abf304c9d232f91c4df91330f8071 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 15 May 2024 14:24:29 +0200 Subject: [PATCH 007/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 38b5b08ffe..03077793c2 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 38b5b08ffe470ed7845062f532f470a8fa389bb7 +Subproject commit 03077793c29043f2705d18c2b95e1552780e5c86 From e3974024ae0f867846f6be0033c16e417bfe00e7 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 15 May 2024 15:36:57 +0200 Subject: [PATCH 008/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 03077793c2..ca34bfe81d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 03077793c29043f2705d18c2b95e1552780e5c86 +Subproject commit ca34bfe81dcebdffa2a9da818839319cfff1eaf7 From 1f0a8eb51c9727907fa414fd05ed9b6b43461a5d Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 15 May 2024 23:40:51 +0200 Subject: [PATCH 009/432] Modified emulated float file --- examples_tests | 2 +- include/nbl/builtin/hlsl/emulated_float64_t.hlsl | 11 ----------- src/nbl/builtin/CMakeLists.txt | 2 ++ 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/examples_tests b/examples_tests index ca34bfe81d..2e00f2043d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ca34bfe81dcebdffa2a9da818839319cfff1eaf7 +Subproject commit 2e00f2043d19ff31c1af3d2852b43ecc915d6a17 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 8ebfaf0a48..5cc061d3b9 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -1,5 +1,3 @@ -template -T _static_cast(const U); using float32_t = float; //using emulated_float64_t = double; @@ -118,12 +116,3 @@ namespace emulated #endif }; } - -// upgrades float to a double -template<> -emulated::emulated_float64_t _static_cast(const float val) -{ - emulated::emulated_float64_t retval; - retval.data = val; // TODO: manually upgrade the `val` IEEE754 32bit pattern to 64bit - return retval; -} diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index aad4a589e2..18ee1383ba 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -227,6 +227,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_nor # HLSL LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") +#emulated +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/fragment_shader_pixel_interlock.hlsl") From 338213c9e9a7b4991b19d5ffea7e7e5d67184309 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 16 May 2024 18:37:17 +0200 Subject: [PATCH 010/432] Fixed GPU values test --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 39 ++----------------- 2 files changed, 4 insertions(+), 37 deletions(-) diff --git a/examples_tests b/examples_tests index 2e00f2043d..2ad6a6ce47 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2e00f2043d19ff31c1af3d2852b43ecc915d6a17 +Subproject commit 2ad6a6ce47597495eb54a2b1910b192a9057c220 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 5cc061d3b9..e4b20e74b3 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -55,8 +55,8 @@ namespace emulated bool operator!=(const emulated_float64_t rhs) { return uint64_t(data) ^ uint64_t(rhs.data); } bool operator<(const emulated_float64_t rhs) { return data < rhs.data; } bool operator>(const emulated_float64_t rhs) { return data > rhs.data; } - bool operator<=(const emulated_float64_t rhs) { return !operator>(rhs); } - bool operator>=(const emulated_float64_t rhs) { return !operator<(rhs); } + bool operator<=(const emulated_float64_t rhs) { return data <= rhs.data; } + bool operator>=(const emulated_float64_t rhs) { return data >= rhs.data; } //logical operators bool operator&&(const emulated_float64_t rhs) { return bool(data) && bool(rhs.data); } @@ -73,46 +73,13 @@ namespace emulated operator min16int() { return min16int(data);} operator float64_t() { return float64_t(data); } operator half() { return half(data); } -#else - operator uint16_t() { return uint16_t(data);} - operator double() { return double(data); } #endif //explicit operator int() const { return int(data); } - // HERE OMITED OPERATORS + // OMITED OPERATORS // - not implementing bitwise and modulo operators since floating point types doesn't support them // - compound operator overload not supported in HLSL // - access operators (dereference and addressof) not supported in HLSL -#ifndef __HLSL_VERSION - // compound assignment operators - emulated_float64_t operator+=(emulated_float64_t rhs) - { - data = data + rhs.data; - return create(data); - } - - emulated_float64_t operator-=(emulated_float64_t rhs) - { - data = data - rhs.data; - return create(data); - } - - emulated_float64_t operator*=(emulated_float64_t rhs) - { - data = data * rhs.data; - return create(data); - } - - emulated_float64_t operator/=(emulated_float64_t rhs) - { - data = data / rhs.data; - return create(data); - } - - // access operators - emulated_float64_t operator*() { return *this; } - emulated_float64_t* operator&() { return this; } -#endif }; } From 9c2dc77c1b8f154c325854c8aa0ce6cb603be490 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 23 May 2024 15:19:07 +0200 Subject: [PATCH 011/432] SavingWork --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 118 ++++++++++++++++-- 2 files changed, 109 insertions(+), 11 deletions(-) diff --git a/examples_tests b/examples_tests index 2ad6a6ce47..ea4fbbf556 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2ad6a6ce47597495eb54a2b1910b192a9057c220 +Subproject commit ea4fbbf55604c654d6d118173df08e34624e1249 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index e4b20e74b3..025d427d79 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -1,18 +1,89 @@ +#include using float32_t = float; //using emulated_float64_t = double; namespace emulated { + namespace impl + { + nbl::hlsl::uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) + { + uint64_t product = uint64_t(lhs) * uint64_t(rhs); + nbl::hlsl::uint32_t2 output; + output.x = (product & 0xFFFFFFFF00000000) >> 32; + output.y = product & 0x00000000FFFFFFFFull; + return output; + } + + nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + { + nbl::hlsl::uint32_t2 output; + output.x = a1 + b1; + output.y = a0 + b0 + uint32_t(output.x < a1); + + return output; + } + + nbl::hlsl::uint32_t2 shortShift64Left(uint32_t a0, uint32_t a1, int count) + { + nbl::hlsl::uint32_t2 output; + output.x = a1 << count; + output.y = nbl::hlsl::lerp((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); + }; + + nbl::hlsl::uint32_t4 mul64to128(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1, uint32_t z0Ptr) + { + uint32_t z0 = 0u; + uint32_t z1 = 0u; + uint32_t z2 = 0u; + uint32_t z3 = 0u; + uint32_t more1 = 0u; + uint32_t more2 = 0u; + + nbl::hlsl::uint32_t2 z2z3 = umulExtended(a0, b1); + z2 = z2z3.x; + z3 = z2z3.y; + nbl::hlsl::uint32_t2 z1more2 = umulExtended(a1, b0); + z1 = z1more2.x; + more2 = z1more2.y; + nbl::hlsl::uint32_t2 z1z2 = add64(z1, more2, 0u, z2); + z1 = z1z2.x; + z2 = z1z2.y; + nbl::hlsl::uint32_t2 z0more1 = umulExtended(a0, b0); + z0 = z0more1.x; + more1 = z0more1.y; + nbl::hlsl::uint32_t2 z0z1 = add64(z0, more1, 0u, z1); + z0 = z0z1.x; + z1 = z0z1.y; + nbl::hlsl::uint32_t2 more1more2 = umulExtended(a0, b1); + more1 = more1more2.x; + more2 = more1more2.y; + nbl::hlsl::uint32_t2 more1z2 = add64(more1, more2, 0u, z2); + more1 = more1z2.x; + z2 = more1z2.y; + nbl::hlsl::uint32_t2 z0z12 = add64(z0, z1, 0u, more1); + z0 = z0z12.x; + z1 = z0z12.y; + + + nbl::hlsl::uint32_t4 output; + output.x = z0; + output.y = z1; + output.z = z2; + output.w = z3; + return output; + } + } + struct emulated_float64_t { - // TODO: change to `uint64_t` when on the emulation stage - using storage_t = float32_t; + using storage_t = uint64_t; storage_t data; // constructors - // TODO: specializations? + // TODO: specializations template static emulated_float64_t create(T val) { @@ -39,8 +110,37 @@ namespace emulated emulated_float64_t operator*(const emulated_float64_t rhs) { emulated_float64_t retval; - retval.data = data * rhs.data; - return retval; + + uint32_t lhsLow = uint32_t(data); + uint32_t rhsLow = uint32_t(rhs.data); + uint32_t lhsHigh = uint32_t((data & 0x000FFFFF00000000ull) >> 32); + uint32_t rhsHigh = uint32_t((rhs.data & 0x000FFFFF00000000ull) >> 32); + uint32_t lhsExp = uint32_t((data >> 52) & 0x7FFull); + uint32_t rhsExp = uint32_t((rhs.data >> 52) & 0x7FFull); + + int32_t exp = lhsExp + rhsExp - 0x400ull; + uint64_t sign = (data ^ rhs.data) & 0x8000000000000000ull; + + + lhsHigh |= 0x00100000u; + nbl::hlsl::uint32_t2 shifted = emulated::impl::shortShift64Left(rhsHigh, rhsLow, 12); + rhsHigh = shifted.x; + rhsLow = shifted.y; + + nbl::hlsl::uint64_t4 product = emulated::impl::mul64To128(lhsHigh, lhsLow, rhsHigh, rhsLow); + product.xy = emulated::impl::add64(product.x, product.y, lhsHigh, aLow); + product.z |= uint32_t(product.w != 0u); + if (0x00200000u <= product.x) + { + //__shift64ExtraRightJamming( + // zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); + ++zExp; + } + //return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); + + //uint32_t frac = 1; + + return emulated_float64_t::create(sign | ((uint64_t(exp) + 1023ull) << 52) | (uint64_t(frac) & 0x000FFFFFFFFFFFFull)); } emulated_float64_t operator/(const emulated_float64_t rhs) @@ -69,11 +169,9 @@ namespace emulated operator uint32_t() { return uint32_t(data); } operator uint64_t() { return uint64_t(data); } operator float() { return float(data); } -#ifdef __HLSL_VERSION - operator min16int() { return min16int(data);} - operator float64_t() { return float64_t(data); } - operator half() { return half(data); } -#endif + //operator min16int() { return min16int(data);} + //operator float64_t() { return float64_t(data); } + //operator half() { return half(data); } //explicit operator int() const { return int(data); } From 9043b292196a43f73a1894c388b8b26923d89b21 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 27 May 2024 23:00:32 +0200 Subject: [PATCH 012/432] Impliminted primitive multiplication for emulated_float64_t --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 178 +++++++++++++++--- 2 files changed, 157 insertions(+), 23 deletions(-) diff --git a/examples_tests b/examples_tests index ea4fbbf556..2a878a000c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ea4fbbf55604c654d6d118173df08e34624e1249 +Subproject commit 2a878a000c9898fe98d707ae965b5ba2122b0049 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 025d427d79..2cb5d303b8 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -19,8 +19,8 @@ namespace emulated nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { nbl::hlsl::uint32_t2 output; - output.x = a1 + b1; - output.y = a0 + b0 + uint32_t(output.x < a1); + output.y = a1 + b1; + output.x = a0 + b0 + uint32_t(output.y < a1); return output; } @@ -28,11 +28,13 @@ namespace emulated nbl::hlsl::uint32_t2 shortShift64Left(uint32_t a0, uint32_t a1, int count) { nbl::hlsl::uint32_t2 output; - output.x = a1 << count; - output.y = nbl::hlsl::lerp((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); + output.y = a1 << count; + output.x = nbl::hlsl::lerp((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); + + return output; }; - nbl::hlsl::uint32_t4 mul64to128(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1, uint32_t z0Ptr) + nbl::hlsl::uint32_t4 mul64to128(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { uint32_t z0 = 0u; uint32_t z1 = 0u; @@ -74,6 +76,133 @@ namespace emulated output.w = z3; return output; } + + nbl::hlsl::uint32_t3 shift64ExtraRightJamming(uint32_t a0, uint32_t a1, uint32_t a2, int count) + { + nbl::hlsl::uint32_t3 output; + output.x = 0u; + + int negCount = (-count) & 31; + + output.z = nbl::hlsl::lerp(uint32_t(a0 != 0u), a0, count == 64); + output.z = nbl::hlsl::lerp(output.z, a0 << negCount, count < 64); + output.z = nbl::hlsl::lerp(output.z, a1 << negCount, count < 32); + + output.y = nbl::hlsl::lerp(0u, (a0 >> (count & 31)), count < 64); + output.y = nbl::hlsl::lerp(output.y, (a0<>count), count < 32); + + a2 = nbl::hlsl::lerp(a2 | a1, a2, count < 32); + output.x = nbl::hlsl::lerp(output.x, a0 >> count, count < 32); + output.z |= uint32_t(a2 != 0u); + + output.x = nbl::hlsl::lerp(output.x, 0u, (count == 32)); + output.y = nbl::hlsl::lerp(output.y, a0, (count == 32)); + output.z = nbl::hlsl::lerp(output.z, a1, (count == 32)); + output.x = nbl::hlsl::lerp(output.x, a0, (count == 0)); + output.y = nbl::hlsl::lerp(output.y, a1, (count == 0)); + output.z = nbl::hlsl::lerp(output.z, a2, (count == 0)); + + return output; + } + + + uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) + { + nbl::hlsl::uint32_t2 z; + + z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; + z.y = zFrac1; + + uint64_t output = 0u; + output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; + output |= uint64_t(z.y); + return output; + } + + + uint64_t roundAndPackFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1, uint32_t zFrac2) + { + bool roundNearestEven; + bool increment; + + roundNearestEven = true; + increment = int(zFrac2) < 0; + if (!roundNearestEven) + { + if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) + { + increment = false; + } + else + { + if (false) //(zSign != 0u) + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && + // (zFrac2 != 0u); + } + else + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && + // (zFrac2 != 0u); + } + } + } + if (0x7FD <= zExp) + { + if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && increment)) + { + if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || + // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || + // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) + { + return packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); + } + + return packFloat64(zSign, 0x7FF, 0u, 0u); + } + } + + if (zExp < 0) + { + nbl::hlsl::uint32_t3 shifted = shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, -zExp); + zFrac0 = shifted.x; + zFrac1 = shifted.y; + zFrac2 = shifted.z; + zExp = 0; + + if (roundNearestEven) + { + increment = zFrac2 < 0u; + } + else + { + if (zSign != 0u) + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u); + } + else + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (zFrac2 != 0u); + } + } + } + + if (increment) + { + nbl::hlsl::uint32_t2 added = add64(zFrac0, zFrac1, 0u, 1u); + zFrac0 = added.x; + zFrac1 = added.y; + zFrac1 &= ~((zFrac2 + uint32_t(zFrac2 == 0u)) & uint32_t(roundNearestEven)); + } + else + { + zExp = nbl::hlsl::lerp(zExp, 0, (zFrac0 | zFrac1) == 0u); + } + + return packFloat64(zSign, zExp, zFrac0, zFrac1); + } + + } struct emulated_float64_t @@ -91,6 +220,14 @@ namespace emulated output.data = val; return output; } + + template<> + static emulated_float64_t create(double val) + { + emulated_float64_t output; + output.data = reinterpret_cast(val); + return output; + } // arithmetic operators emulated_float64_t operator+(const emulated_float64_t rhs) @@ -109,16 +246,17 @@ namespace emulated emulated_float64_t operator*(const emulated_float64_t rhs) { - emulated_float64_t retval; - - uint32_t lhsLow = uint32_t(data); - uint32_t rhsLow = uint32_t(rhs.data); + emulated_float64_t retval = emulated_float64_t::create(0u); + + + uint32_t lhsLow = uint32_t(data & 0x00000000FFFFFFFFull); + uint32_t rhsLow = uint32_t(rhs.data & 0x00000000FFFFFFFFull); uint32_t lhsHigh = uint32_t((data & 0x000FFFFF00000000ull) >> 32); uint32_t rhsHigh = uint32_t((rhs.data & 0x000FFFFF00000000ull) >> 32); uint32_t lhsExp = uint32_t((data >> 52) & 0x7FFull); uint32_t rhsExp = uint32_t((rhs.data >> 52) & 0x7FFull); - int32_t exp = lhsExp + rhsExp - 0x400ull; + int32_t exp = int32_t(lhsExp + rhsExp) - 0x400u; uint64_t sign = (data ^ rhs.data) & 0x8000000000000000ull; @@ -127,20 +265,16 @@ namespace emulated rhsHigh = shifted.x; rhsLow = shifted.y; - nbl::hlsl::uint64_t4 product = emulated::impl::mul64To128(lhsHigh, lhsLow, rhsHigh, rhsLow); - product.xy = emulated::impl::add64(product.x, product.y, lhsHigh, aLow); - product.z |= uint32_t(product.w != 0u); - if (0x00200000u <= product.x) + nbl::hlsl::uint32_t4 fracUnpacked = impl::mul64to128(lhsHigh, lhsLow, rhsHigh, rhsLow); + fracUnpacked.xy = emulated::impl::add64(fracUnpacked.x, fracUnpacked.y, lhsHigh, lhsLow); + fracUnpacked.z |= uint32_t(fracUnpacked.w != 0u); + if (0x00200000u <= fracUnpacked.x) { - //__shift64ExtraRightJamming( - // zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); - ++zExp; + fracUnpacked = nbl::hlsl::uint32_t4(impl::shift64ExtraRightJamming(fracUnpacked.x, fracUnpacked.y, fracUnpacked.z, 1), 0u); + ++exp; } - //return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); - - //uint32_t frac = 1; - - return emulated_float64_t::create(sign | ((uint64_t(exp) + 1023ull) << 52) | (uint64_t(frac) & 0x000FFFFFFFFFFFFull)); + + return emulated_float64_t::create(impl::roundAndPackFloat64(sign, exp, fracUnpacked.x, fracUnpacked.y, fracUnpacked.z)); } emulated_float64_t operator/(const emulated_float64_t rhs) From b20de536558e4cddacd02b176eca0ced55a78908 Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 31 May 2024 18:55:50 +0200 Subject: [PATCH 013/432] Implemented add sub i mul operators --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 336 ++++++++++++++++-- 2 files changed, 304 insertions(+), 34 deletions(-) diff --git a/examples_tests b/examples_tests index 2a878a000c..1169a1301f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2a878a000c9898fe98d707ae965b5ba2122b0049 +Subproject commit 1169a1301f8e21957ea962a2f6abf502b58d6591 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 2cb5d303b8..65210835d6 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -1,7 +1,30 @@ #include -using float32_t = float; -//using emulated_float64_t = double; +#ifdef __HLSL_VERSION +#define LERP lerp +#else +#define LERP nbl::hlsl::lerp +#endif + +#ifdef __HLSL_VERSION +#define ABS abs +#else +#define ABS std::abs +#endif + +// TODO: inline function +#define EXCHANGE(a, b) \ + do { \ + a ^= b; \ + b ^= a; \ + a ^= b; \ + } while (false) + +#define FLOAT_ROUND_NEAREST_EVEN 0 +#define FLOAT_ROUND_TO_ZERO 1 +#define FLOAT_ROUND_DOWN 2 +#define FLOAT_ROUND_UP 3 +#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN namespace emulated { @@ -11,8 +34,8 @@ namespace emulated { uint64_t product = uint64_t(lhs) * uint64_t(rhs); nbl::hlsl::uint32_t2 output; - output.x = (product & 0xFFFFFFFF00000000) >> 32; - output.y = product & 0x00000000FFFFFFFFull; + output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); + output.y = uint32_t(product & 0x00000000FFFFFFFFull); return output; } @@ -24,16 +47,56 @@ namespace emulated return output; } + + nbl::hlsl::uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + { + nbl::hlsl::uint32_t2 output; + output.y = a1 - b1; + output.x = a0 - b0 - uint32_t(a1 < b1); + + return output; + } + + // TODO: test + int countLeadingZeros32(uint32_t val) + { +#ifndef __HLSL_VERSION + return 31 - nbl::hlsl::findMSB(val); +#else + return 31 - firstbithigh(val); +#endif + } + nbl::hlsl::uint32_t2 shortShift64Left(uint32_t a0, uint32_t a1, int count) { nbl::hlsl::uint32_t2 output; output.y = a1 << count; - output.x = nbl::hlsl::lerp((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); + output.x = LERP((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); return output; }; + nbl::hlsl::uint32_t2 shift64RightJamming(uint32_t a0, uint32_t a1, int count) + { + nbl::hlsl::uint32_t2 output; + const int negCount = (-count) & 31; + + output.x = LERP(0u, a0, count == 0); + output.x = LERP(output.x, (a0 >> count), count < 32); + + output.y = uint32_t((a0 | a1) != 0u); /* count >= 64 */ + uint32_t z1_lt64 = (a0>>(count & 31)) | uint32_t(((a0<>count) | uint32_t ((a1<> (count & 31)), count < 64); - output.y = nbl::hlsl::lerp(output.y, (a0<>count), count < 32); + output.y = LERP(0u, (a0 >> (count & 31)), count < 64); + output.y = LERP(output.y, (a0<>count), count < 32); - a2 = nbl::hlsl::lerp(a2 | a1, a2, count < 32); - output.x = nbl::hlsl::lerp(output.x, a0 >> count, count < 32); + a2 = LERP(a2 | a1, a2, count < 32); + output.x = LERP(output.x, a0 >> count, count < 32); output.z |= uint32_t(a2 != 0u); - output.x = nbl::hlsl::lerp(output.x, 0u, (count == 32)); - output.y = nbl::hlsl::lerp(output.y, a0, (count == 32)); - output.z = nbl::hlsl::lerp(output.z, a1, (count == 32)); - output.x = nbl::hlsl::lerp(output.x, a0, (count == 0)); - output.y = nbl::hlsl::lerp(output.y, a1, (count == 0)); - output.z = nbl::hlsl::lerp(output.z, a2, (count == 0)); + output.x = LERP(output.x, 0u, (count == 32)); + output.y = LERP(output.y, a0, (count == 32)); + output.z = LERP(output.z, a1, (count == 32)); + output.x = LERP(output.x, a0, (count == 0)); + output.y = LERP(output.y, a1, (count == 0)); + output.z = LERP(output.z, a2, (count == 0)); return output; } @@ -178,11 +241,11 @@ namespace emulated { if (zSign != 0u) { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u); + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u); } else { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (zFrac2 != 0u); + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (zFrac2 != 0u); } } } @@ -196,12 +259,36 @@ namespace emulated } else { - zExp = nbl::hlsl::lerp(zExp, 0, (zFrac0 | zFrac1) == 0u); + zExp = LERP(zExp, 0, (zFrac0 | zFrac1) == 0u); } return packFloat64(zSign, zExp, zFrac0, zFrac1); } - + + uint64_t normalizeRoundAndPackFloat64(uint32_t sign, int exp, uint32_t frac0, uint32_t frac1) + { + int shiftCount; + nbl::hlsl::uint32_t3 frac = nbl::hlsl::uint32_t3(frac0, frac1, 0u); + + if (frac.x == 0u) + { + exp -= 32; + frac.x = frac.y; + frac.y = 0u; + } + + shiftCount = countLeadingZeros32(frac.x) - 11; + if (0 <= shiftCount) + { + frac.xy = shortShift64Left(frac.x, frac.y, shiftCount); + } + else + { + frac.xyz = shift64ExtraRightJamming(frac.x, frac.y, 0u, -shiftCount); + } + exp -= shiftCount; + return roundAndPackFloat64(sign, exp, frac.x, frac.y, frac.z); + } } @@ -215,12 +302,37 @@ namespace emulated // TODO: specializations template static emulated_float64_t create(T val) - { + { + emulated_float64_t output; + output.data = val; + return output; + } + + static emulated_float64_t createEmulatedFloat64PreserveBitPattern(uint64_t val) + { emulated_float64_t output; output.data = val; return output; } + // TODO: won't not work for uints with msb of index > 52 + template<> + static emulated_float64_t create(uint64_t val) + { +#ifndef __HLSL_VERSION + const uint64_t msbIndex = nbl::hlsl::findMSB(val); +#else + const uint64_t msbIndex = firstbithigh(val); +#endif + uint64_t exp = ((msbIndex + 1023) << 52) & 0x7FF0000000000000; + uint64_t mantissa = (val << (52 - msbIndex)) & 0x000FFFFFFFFFFFFFull; + emulated_float64_t output; + output.data = exp | mantissa; + return output; + } + + // TODO: temporary, remove +#ifndef __HLSL_VERSION template<> static emulated_float64_t create(double val) { @@ -228,36 +340,184 @@ namespace emulated output.data = reinterpret_cast(val); return output; } +#endif // arithmetic operators emulated_float64_t operator+(const emulated_float64_t rhs) { - emulated_float64_t retval; - retval.data = data + rhs.data; - return retval; + emulated_float64_t retval = createEmulatedFloat64PreserveBitPattern(0u); + + uint32_t lhsSign = uint32_t((data & 0x8000000000000000ull) >> 32); + uint32_t rhsSign = uint32_t((rhs.data & 0x8000000000000000ull) >> 32); + + uint32_t lhsLow = uint32_t(data & 0x00000000FFFFFFFFull); + uint32_t rhsLow = uint32_t(rhs.data & 0x00000000FFFFFFFFull); + uint32_t lhsHigh = uint32_t((data & 0x000FFFFF00000000ull) >> 32); + uint32_t rhsHigh = uint32_t((rhs.data & 0x000FFFFF00000000ull) >> 32); + + int lhsExp = int((data >> 52) & 0x7FFull); + int rhsExp = int((rhs.data >> 52) & 0x7FFull); + + int expDiff = lhsExp - rhsExp; + + if (lhsSign == rhsSign) + { + nbl::hlsl::uint32_t3 frac; + int exp; + + if (expDiff == 0) + { + //if (lhsExp == 0x7FF) + //{ + // bool propagate = ((lhsHigh | rhsHigh) | (lhsLow| rhsLow)) != 0u; + // return nbl::hlsl::lerp(a, propagateFloat64NaN(a, b), propagate); + //} + frac.xy = impl::add64(lhsHigh, lhsLow, rhsHigh, rhsLow); + if (lhsExp == 0) + return createEmulatedFloat64PreserveBitPattern(impl::packFloat64(lhsSign, 0, frac.x, frac.y)); + frac.z = 0u; + frac.x |= 0x00200000u; + exp = lhsExp; + frac = impl::shift64ExtraRightJamming(frac.x, frac.y, frac.z, 1); + } + else + { + if (expDiff < 0) + { + EXCHANGE(lhsHigh, rhsHigh); + EXCHANGE(lhsLow, rhsLow); + EXCHANGE(lhsExp, rhsExp); + } + + //if (lhsExp == 0x7FF) + //{ + // bool propagate = (lhsHigh | lhsLow) != 0u; + // return nbl::hlsl::lerp(__packFloat64(lhsSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); + //} + + expDiff = LERP(ABS(expDiff), ABS(expDiff) - 1, rhsExp == 0); + rhsHigh = LERP(rhsHigh | 0x00100000u, rhsHigh, rhsExp == 0); + nbl::hlsl::float32_t3 shifted = impl::shift64ExtraRightJamming(rhsHigh, rhsLow, 0u, expDiff); + rhsHigh = shifted.x; + rhsLow = shifted.y; + frac.z = shifted.z; + exp = lhsExp; + + lhsHigh |= 0x00100000u; + frac.xy = impl::add64(lhsHigh, lhsLow, rhsHigh, rhsLow); + --exp; + if (!(frac.x < 0x00200000u)) + { + frac = impl::shift64ExtraRightJamming(frac.x, frac.y, frac.z, 1); + ++exp; + } + + return createEmulatedFloat64PreserveBitPattern(impl::roundAndPackFloat64(lhsSign, exp, frac.x, frac.y, frac.z)); + } + + // cannot happen but compiler cries about not every path returning value + return createEmulatedFloat64PreserveBitPattern(0xdeadbeefbadcaffeull); + } + else + { + int exp; + + nbl::hlsl::uint32_t2 lhsShifted = impl::shortShift64Left(lhsHigh, lhsLow, 10); + lhsHigh = lhsShifted.x; + lhsLow = lhsShifted.y; + nbl::hlsl::uint32_t2 rhsShifted = impl::shortShift64Left(rhsHigh, rhsLow, 10); + rhsHigh = rhsShifted.x; + rhsLow = rhsShifted.y; + + if (expDiff != 0) + { + nbl::hlsl::uint32_t2 frac; + + if (expDiff < 0) + { + EXCHANGE(lhsHigh, rhsHigh); + EXCHANGE(lhsLow, rhsLow); + EXCHANGE(lhsExp, rhsExp); + lhsSign ^= 0x80000000u; // smth not right about that + } + + //if (lhsExp == 0x7FF) + //{ + // bool propagate = (lhsHigh | lhsLow) != 0u; + // return nbl::hlsl::lerp(__packFloat64(lhsSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); + //} + + expDiff = LERP(ABS(expDiff), ABS(expDiff) - 1, rhsExp == 0); + rhsHigh = LERP(rhsHigh | 0x40000000u, rhsHigh, rhsExp == 0); + nbl::hlsl::uint32_t2 shifted = impl::shift64RightJamming(rhsHigh, rhsLow, expDiff); + rhsHigh = shifted.x; + rhsLow = shifted.y; + lhsHigh |= 0x40000000u; + frac.xy = impl::sub64(lhsHigh, lhsLow, rhsHigh, rhsLow); + exp = lhsExp; + --exp; + return createEmulatedFloat64PreserveBitPattern(impl::normalizeRoundAndPackFloat64(lhsSign, exp - 10, frac.x, frac.y)); + } + //if (lhsExp == 0x7FF) + //{ + // bool propagate = ((lhsHigh | rhsHigh) | (lhsLow | rhsLow)) != 0u; + // return nbl::hlsl::lerp(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); + //} + rhsExp = LERP(rhsExp, 1, lhsExp == 0); + lhsExp = LERP(lhsExp, 1, lhsExp == 0); + + nbl::hlsl::uint32_t2 frac; + uint32_t signOfDifference = 0; + if (rhsHigh < lhsHigh) + { + frac.xy = impl::sub64(lhsHigh, lhsLow, rhsHigh, rhsLow); + } + else if (lhsHigh < rhsHigh) + { + frac.xy = impl::sub64(rhsHigh, rhsLow, lhsHigh, lhsLow); + signOfDifference = 0x80000000; + } + else if (rhsLow <= lhsLow) + { + /* It is possible that frac.x and frac.y may be zero after this. */ + frac.xy = impl::sub64(lhsHigh, lhsLow, rhsHigh, rhsLow); + } + else + { + frac.xy = impl::sub64(rhsHigh, rhsLow, lhsHigh, lhsLow); + signOfDifference = 0x80000000; + } + + exp = LERP(rhsExp, lhsExp, signOfDifference == 0u); + lhsSign ^= signOfDifference; + uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); + uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, exp - 11, frac.x, frac.y); + return createEmulatedFloat64PreserveBitPattern(LERP(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + } } - emulated_float64_t operator-(const emulated_float64_t rhs) + emulated_float64_t operator-(emulated_float64_t rhs) { - emulated_float64_t retval; - retval.data = data - rhs.data; - return retval; + emulated_float64_t lhs = createEmulatedFloat64PreserveBitPattern(data); + emulated_float64_t rhsFlipped = rhs.flipSign(); + + return lhs + rhsFlipped; } emulated_float64_t operator*(const emulated_float64_t rhs) { emulated_float64_t retval = emulated_float64_t::create(0u); - uint32_t lhsLow = uint32_t(data & 0x00000000FFFFFFFFull); uint32_t rhsLow = uint32_t(rhs.data & 0x00000000FFFFFFFFull); uint32_t lhsHigh = uint32_t((data & 0x000FFFFF00000000ull) >> 32); uint32_t rhsHigh = uint32_t((rhs.data & 0x000FFFFF00000000ull) >> 32); + uint32_t lhsExp = uint32_t((data >> 52) & 0x7FFull); uint32_t rhsExp = uint32_t((rhs.data >> 52) & 0x7FFull); int32_t exp = int32_t(lhsExp + rhsExp) - 0x400u; - uint64_t sign = (data ^ rhs.data) & 0x8000000000000000ull; + uint64_t sign = uint32_t(((data ^ rhs.data) & 0x8000000000000000ull) >> 32); lhsHigh |= 0x00100000u; @@ -274,9 +534,10 @@ namespace emulated ++exp; } - return emulated_float64_t::create(impl::roundAndPackFloat64(sign, exp, fracUnpacked.x, fracUnpacked.y, fracUnpacked.z)); + return createEmulatedFloat64PreserveBitPattern(impl::roundAndPackFloat64(sign, exp, fracUnpacked.x, fracUnpacked.y, fracUnpacked.z)); } + // TODO emulated_float64_t operator/(const emulated_float64_t rhs) { emulated_float64_t retval; @@ -313,5 +574,14 @@ namespace emulated // - not implementing bitwise and modulo operators since floating point types doesn't support them // - compound operator overload not supported in HLSL // - access operators (dereference and addressof) not supported in HLSL + + // TODO: should modify self? + emulated_float64_t flipSign() + { + const uint64_t flippedSign = ((~data) & 0x8000000000000000ull); + return createEmulatedFloat64PreserveBitPattern(flippedSign | (data & 0x7FFFFFFFFFFFFFFFull)); + } }; + + //_NBL_STATIC_INLINE_CONSTEXPR emulated_float64_t EMULATED_FLOAT64_NAN = emulated_float64_t::create(0.0 / 0.0); } From c5a635267e6ee21195b1a9fa9098fbba3187ff07 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 5 Jun 2024 15:16:06 +0200 Subject: [PATCH 014/432] All operators (except div) works on both CPU and GPU --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 25 +++++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/examples_tests b/examples_tests index 1169a1301f..2d5d301138 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1169a1301f8e21957ea962a2f6abf502b58d6591 +Subproject commit 2d5d3011384474aec6e9527abafc707214f427bd diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 65210835d6..217ac2b14b 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -303,9 +303,19 @@ namespace emulated template static emulated_float64_t create(T val) { +#ifndef __HLSL_VERSION emulated_float64_t output; - output.data = val; + output.data = reinterpret_cast(val); return output; +#else + uint32_t lowBits; + uint32_t highBits; + asuint(val, lowBits, highBits); + + emulated_float64_t output; + output.data = (uint64_t(highBits) << 32) | uint64_t(lowBits); + return output; +#endif } static emulated_float64_t createEmulatedFloat64PreserveBitPattern(uint64_t val) @@ -316,6 +326,7 @@ namespace emulated } // TODO: won't not work for uints with msb of index > 52 +#ifndef __HLSL_VERSION template<> static emulated_float64_t create(uint64_t val) { @@ -330,7 +341,7 @@ namespace emulated output.data = exp | mantissa; return output; } - +#endif // TODO: temporary, remove #ifndef __HLSL_VERSION template<> @@ -438,7 +449,7 @@ namespace emulated EXCHANGE(lhsHigh, rhsHigh); EXCHANGE(lhsLow, rhsLow); EXCHANGE(lhsExp, rhsExp); - lhsSign ^= 0x80000000u; // smth not right about that + lhsSign ^= 0x80000000u; // TODO: smth not right about that } //if (lhsExp == 0x7FF) @@ -540,14 +551,12 @@ namespace emulated // TODO emulated_float64_t operator/(const emulated_float64_t rhs) { - emulated_float64_t retval; - retval.data = data / rhs.data; - return retval; + return createEmulatedFloat64PreserveBitPattern(0xdeadbeefbadcaffeull); } // relational operators - bool operator==(const emulated_float64_t rhs) { return !(uint64_t(data) ^ uint64_t(rhs.data)); } - bool operator!=(const emulated_float64_t rhs) { return uint64_t(data) ^ uint64_t(rhs.data); } + bool operator==(const emulated_float64_t rhs) { return !(data ^ rhs.data); } + bool operator!=(const emulated_float64_t rhs) { return data ^ rhs.data; } bool operator<(const emulated_float64_t rhs) { return data < rhs.data; } bool operator>(const emulated_float64_t rhs) { return data > rhs.data; } bool operator<=(const emulated_float64_t rhs) { return data <= rhs.data; } From 9de77db3e4b052a535812f252d312f838b4d35aa Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 10 Jun 2024 15:02:49 +0200 Subject: [PATCH 015/432] Ported ieee754.glsl --- include/nbl/builtin/hlsl/bit.hlsl | 8 +- include/nbl/builtin/hlsl/cpp_compat.hlsl | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 56 +++++++++--- include/nbl/builtin/hlsl/ieee754.hlsl | 88 +++++++++++++++++++ 4 files changed, 138 insertions(+), 16 deletions(-) create mode 100644 include/nbl/builtin/hlsl/ieee754.hlsl diff --git a/include/nbl/builtin/hlsl/bit.hlsl b/include/nbl/builtin/hlsl/bit.hlsl index fd6ff0c167..e1f1117af6 100644 --- a/include/nbl/builtin/hlsl/bit.hlsl +++ b/include/nbl/builtin/hlsl/bit.hlsl @@ -33,11 +33,11 @@ namespace nbl namespace hlsl { -template -T bit_cast(U val) +template +To bit_cast(From val) { - static_assert(sizeof(T) <= sizeof(U)); - return spirv::bitcast(val); + static_assert(sizeof(To) <= sizeof(From)); + return spirv::bitcast(val); } template diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index ee615ba5b7..a22e8cc0c5 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -41,7 +41,7 @@ using add_pointer = std::add_pointer; #define ARROW .arrow(). #define NBL_CONSTEXPR const static #define NBL_CONSTEXPR_STATIC_INLINE const static -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC namespace nbl { diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 217ac2b14b..175e610914 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -38,6 +38,11 @@ namespace emulated output.y = uint32_t(product & 0x00000000FFFFFFFFull); return output; } + + bool isNaN64(uint64_t val) + { + return bool((0x7FF0000000000000ull & val) && (0x000FFFFFFFFFFFFFull & val)); + } nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { @@ -47,7 +52,8 @@ namespace emulated return output; } - + + nbl::hlsl::uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { nbl::hlsl::uint32_t2 output; @@ -66,6 +72,24 @@ namespace emulated return 31 - firstbithigh(val); #endif } + + uint64_t propagateFloat64NaN(uint64_t a, uint64_t b) + { + #if defined RELAXED_NAN_PROPAGATION + return a | b; + #else + + bool aIsNaN = isNaN64(a); + bool bIsNaN = isNaN64(b); + a |= 0x0008000000000000ull; + b |= 0x0008000000000000ull; + + // TODO: + //return LERP(b, LERP(a, b, nbl::hlsl::float32_t2(bIsNaN, bIsNaN)), nbl::hlsl::float32_t2(aIsNaN, aIsNaN)); + return 0xdeadbeefbadcaffeull; + #endif + } + nbl::hlsl::uint32_t2 shortShift64Left(uint32_t a0, uint32_t a1, int count) @@ -289,7 +313,10 @@ namespace emulated exp -= shiftCount; return roundAndPackFloat64(sign, exp, frac.x, frac.y, frac.z); } - + + static const uint64_t SIGN_MASK = 0x8000000000000000ull; + static const uint64_t EXP_MASK = 0x7FF0000000000000ull; + static const uint64_t MANTISA_MASK = 0x000FFFFFFFFFFFFFull; } struct emulated_float64_t @@ -357,7 +384,7 @@ namespace emulated emulated_float64_t operator+(const emulated_float64_t rhs) { emulated_float64_t retval = createEmulatedFloat64PreserveBitPattern(0u); - + uint32_t lhsSign = uint32_t((data & 0x8000000000000000ull) >> 32); uint32_t rhsSign = uint32_t((rhs.data & 0x8000000000000000ull) >> 32); @@ -380,9 +407,10 @@ namespace emulated { //if (lhsExp == 0x7FF) //{ - // bool propagate = ((lhsHigh | rhsHigh) | (lhsLow| rhsLow)) != 0u; - // return nbl::hlsl::lerp(a, propagateFloat64NaN(a, b), propagate); + // bool propagate = (lhsMantissa | rhsMantissa) != 0u; + // return createEmulatedFloat64PreserveBitPattern(LERP(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); //} + frac.xy = impl::add64(lhsHigh, lhsLow, rhsHigh, rhsLow); if (lhsExp == 0) return createEmulatedFloat64PreserveBitPattern(impl::packFloat64(lhsSign, 0, frac.x, frac.y)); @@ -398,13 +426,14 @@ namespace emulated EXCHANGE(lhsHigh, rhsHigh); EXCHANGE(lhsLow, rhsLow); EXCHANGE(lhsExp, rhsExp); + EXCHANGE(lhsExp, rhsExp); } - //if (lhsExp == 0x7FF) - //{ - // bool propagate = (lhsHigh | lhsLow) != 0u; - // return nbl::hlsl::lerp(__packFloat64(lhsSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); - //} + if (lhsExp == 0x7FF) + { + bool propagate = (lhsHigh | lhsLow) != 0u; + return createEmulatedFloat64PreserveBitPattern(LERP(0x7FF0000000000000ull | (uint64_t(lhsSign) << 32), impl::propagateFloat64NaN(data, rhs.data), propagate)); + } expDiff = LERP(ABS(expDiff), ABS(expDiff) - 1, rhsExp == 0); rhsHigh = LERP(rhsHigh | 0x00100000u, rhsHigh, rhsExp == 0); @@ -449,7 +478,7 @@ namespace emulated EXCHANGE(lhsHigh, rhsHigh); EXCHANGE(lhsLow, rhsLow); EXCHANGE(lhsExp, rhsExp); - lhsSign ^= 0x80000000u; // TODO: smth not right about that + lhsSign ^= 0x80000000u; } //if (lhsExp == 0x7FF) @@ -590,6 +619,11 @@ namespace emulated const uint64_t flippedSign = ((~data) & 0x8000000000000000ull); return createEmulatedFloat64PreserveBitPattern(flippedSign | (data & 0x7FFFFFFFFFFFFFFFull)); } + + bool isNaN() + { + return impl::isNaN64(data); + } }; //_NBL_STATIC_INLINE_CONSTEXPR emulated_float64_t EMULATED_FLOAT64_NAN = emulated_float64_t::create(0.0 / 0.0); diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl new file mode 100644 index 0000000000..481d7678f7 --- /dev/null +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -0,0 +1,88 @@ +#ifndef _NBL_BUILTIN_HLSL_IEE754_H_INCLUDED_ +#define _NBL_BUILTIN_HLSL_IEE754_H_INCLUDED_ + +#include + +namespace nbl::hlsl::ieee754 +{ + template + int getExponentBitCnt() { return 0xdeadbeefu; } + template<> int getExponentBitCnt() { return 5; } + template<> int getExponentBitCnt() { return 5; } + template<> int getExponentBitCnt() { return 8; } + template<> int getExponentBitCnt() { return 8; } + template<> int getExponentBitCnt() { return 11; } + template<> int getExponentBitCnt() { return 11; } + + template + int getMantissaBitCnt() { return 0xdeadbeefu; } + template<> int getMantissaBitCnt() { return 10; } + template<> int getMantissaBitCnt() { return 10; } + template<> int getMantissaBitCnt() { return 23; } + template<> int getMantissaBitCnt() { return 23; } + template<> int getMantissaBitCnt() { return 52; } + template<> int getMantissaBitCnt() { return 52; } + + template + int getExponentBias() { return 0xdeadbeefu; } + template<> int getExponentBias() { return 15; } + template<> int getExponentBias() { return 15; } + template<> int getExponentBias() { return 127; } + template<> int getExponentBias() { return 127; } + template<> int getExponentBias() { return 1023; } + template<> int getExponentBias() { return 1023; } + + template + unsigned_integer_of_size::type getExponentMask() { return 0xdeadbeefu; } + template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } + template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } + template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } + template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } + template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } + template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } + + template + unsigned_integer_of_size::type getMantissaMask() { return 0xdeadbeefu; } + template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } + template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } + template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } + template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } + template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } + template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } + + template + uint32_t extractBiasedExponent(T x) + { + using AsUint = typename unsigned_integer_of_size::type; + return bitfieldExtract(bit_cast(x), getMantissaBitCnt(), getExponentBitCnt()); + } + + template + int extractExponent(T x) + { + return int(extractBiasedExponent(x) - getExponentBias()); + } + + template + T replaceBiasedExponent(T x, uint32_t biasedExp) + { + using AsUint = typename unsigned_integer_of_size::type; + return bitCast(uintBitsToFloat(bitfieldInsert(bit_cast(x), biasedExp, getMantissaBitCnt(), getExponentBitCnt()))); + } + + // performs no overflow tests, returns x*exp2(n) + template + T fastMulExp2(T x, int n) + { + return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); + } + + template + unsigned_integer_of_size::type extractMantissa(T x) + { + using AsUint = typename unsigned_integer_of_size::type; + return (bit_cast(x) & getMantissaMask()); + } +} + +#endif \ No newline at end of file From aba21cef477aa123ac08bae332ddf2193e38b886 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 13 Jun 2024 23:25:59 +0200 Subject: [PATCH 016/432] Implemented ieee754.hlsl --- include/nbl/builtin/hlsl/bit.hlsl | 14 +++++ include/nbl/builtin/hlsl/ieee754.hlsl | 64 ++++++++++++++++++----- include/nbl/builtin/hlsl/type_traits.hlsl | 24 +++++++++ 3 files changed, 90 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/bit.hlsl b/include/nbl/builtin/hlsl/bit.hlsl index e1f1117af6..d9bf8b0f42 100644 --- a/include/nbl/builtin/hlsl/bit.hlsl +++ b/include/nbl/builtin/hlsl/bit.hlsl @@ -19,6 +19,20 @@ constexpr To bit_cast(const From& from) return std::bit_cast(from); } +template +NBL_FORCE_INLINE constexpr T bitfield_insert(T base, T insert, int32_t offset, int32_t bits) +{ + NBL_CONSTEXPR T one = static_cast(1); + const T mask = (one << bits) - one; + const T shifted_mask = mask << offset; + + insert &= mask; + base &= (~shifted_mask); + base |= (insert << offset); + + return base; +} + NBL_ALIAS_TEMPLATE_FUNCTION(std::rotl, rotl); NBL_ALIAS_TEMPLATE_FUNCTION(std::rotr, rotr); NBL_ALIAS_TEMPLATE_FUNCTION(std::countl_zero, countl_zero); diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 481d7678f7..55cfb5214f 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -5,22 +5,63 @@ namespace nbl::hlsl::ieee754 { + namespace impl + { + template + unsigned_integer_of_size::type castToUintType(T x) + { + using AsUint = typename unsigned_integer_of_size::type; + return bit_cast(x); + } + + // to avoid bit cast from uintN_t to uintN_t + template <> unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } + template <> unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } + template <> unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } + + template + T replaceBiasedExponent(T x, uint32_t biasedExp) + { + static_assert(is_same::value || is_same::value || is_same::value, + "Invalid type! Only floating point or unsigned integer types are allowed."); + return bitfield_insert(x, T(biasedExp), getMantissaBitCnt(), getExponentBitCnt()); + } + + template<> + float16_t replaceBiasedExponent(float16_t x, uint32_t biasedExp) + { + return bit_cast(replaceBiasedExponent(bit_cast(x), biasedExp)); + } + + template<> + float32_t replaceBiasedExponent(float32_t x, uint32_t biasedExp) + { + return bit_cast(replaceBiasedExponent(bit_cast(x), biasedExp)); + } + + template<> + float64_t replaceBiasedExponent(float64_t x, uint32_t biasedExp) + { + return bit_cast(replaceBiasedExponent(bit_cast(x), biasedExp)); + } + } + template int getExponentBitCnt() { return 0xdeadbeefu; } template<> int getExponentBitCnt() { return 5; } template<> int getExponentBitCnt() { return 5; } - template<> int getExponentBitCnt() { return 8; } + template<> int getExponentBitCnt() { return 8; } template<> int getExponentBitCnt() { return 8; } - template<> int getExponentBitCnt() { return 11; } + template<> int getExponentBitCnt() { return 11; } template<> int getExponentBitCnt() { return 11; } template int getMantissaBitCnt() { return 0xdeadbeefu; } template<> int getMantissaBitCnt() { return 10; } template<> int getMantissaBitCnt() { return 10; } - template<> int getMantissaBitCnt() { return 23; } + template<> int getMantissaBitCnt() { return 23; } template<> int getMantissaBitCnt() { return 23; } - template<> int getMantissaBitCnt() { return 52; } + template<> int getMantissaBitCnt() { return 52; } template<> int getMantissaBitCnt() { return 52; } template @@ -36,25 +77,25 @@ namespace nbl::hlsl::ieee754 unsigned_integer_of_size::type getExponentMask() { return 0xdeadbeefu; } template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } - template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } + template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } - template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } + template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } template unsigned_integer_of_size::type getMantissaMask() { return 0xdeadbeefu; } template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } - template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } + template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } - template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } + template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } template uint32_t extractBiasedExponent(T x) { using AsUint = typename unsigned_integer_of_size::type; - return bitfieldExtract(bit_cast(x), getMantissaBitCnt(), getExponentBitCnt()); + return bitfieldExtract(impl::castToUintType(x), getMantissaBitCnt(), getExponentBitCnt()); } template @@ -66,8 +107,7 @@ namespace nbl::hlsl::ieee754 template T replaceBiasedExponent(T x, uint32_t biasedExp) { - using AsUint = typename unsigned_integer_of_size::type; - return bitCast(uintBitsToFloat(bitfieldInsert(bit_cast(x), biasedExp, getMantissaBitCnt(), getExponentBitCnt()))); + return impl::replaceBiasedExponent(x, biasedExp); } // performs no overflow tests, returns x*exp2(n) @@ -81,7 +121,7 @@ namespace nbl::hlsl::ieee754 unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; - return (bit_cast(x) & getMantissaMask()); + return (impl::castToUintType(x) & getMantissaMask()); } } diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 6a58c157a1..f1b383f4bc 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -661,6 +661,30 @@ struct unsigned_integer_of_size<8> using type = uint64_t; }; +template +struct float_of_size +{ + using type = void; +}; + +template<> +struct float_of_size<2> +{ + using type = float16_t; +}; + +template<> +struct float_of_size<4> +{ + using type = float32_t; +}; + +template<> +struct float_of_size<8> +{ + using type = uint64_t; +}; + } } From d54ac4d3f13c6972f2731ed57ea4c7199a3476d4 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 18 Jun 2024 22:10:41 +0200 Subject: [PATCH 017/432] Saving work --- examples_tests | 2 +- include/nbl/builtin/hlsl/bit.hlsl | 14 - .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 702 ++++++++++-------- .../nbl/builtin/hlsl/glsl_compat/core.hlsl | 117 +-- include/nbl/builtin/hlsl/ieee754.hlsl | 127 ++-- src/nbl/builtin/CMakeLists.txt | 2 + 6 files changed, 520 insertions(+), 444 deletions(-) diff --git a/examples_tests b/examples_tests index 2d5d301138..9874800e41 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2d5d3011384474aec6e9527abafc707214f427bd +Subproject commit 9874800e41d75327724b5ed6cf4b11cabc7eedd7 diff --git a/include/nbl/builtin/hlsl/bit.hlsl b/include/nbl/builtin/hlsl/bit.hlsl index d9bf8b0f42..e1f1117af6 100644 --- a/include/nbl/builtin/hlsl/bit.hlsl +++ b/include/nbl/builtin/hlsl/bit.hlsl @@ -19,20 +19,6 @@ constexpr To bit_cast(const From& from) return std::bit_cast(from); } -template -NBL_FORCE_INLINE constexpr T bitfield_insert(T base, T insert, int32_t offset, int32_t bits) -{ - NBL_CONSTEXPR T one = static_cast(1); - const T mask = (one << bits) - one; - const T shifted_mask = mask << offset; - - insert &= mask; - base &= (~shifted_mask); - base |= (insert << offset); - - return base; -} - NBL_ALIAS_TEMPLATE_FUNCTION(std::rotl, rotl); NBL_ALIAS_TEMPLATE_FUNCTION(std::rotr, rotr); NBL_ALIAS_TEMPLATE_FUNCTION(std::countl_zero, countl_zero); diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 175e610914..fe37f001be 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -1,4 +1,8 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ + #include +#include #ifdef __HLSL_VERSION #define LERP lerp @@ -25,298 +29,312 @@ #define FLOAT_ROUND_DOWN 2 #define FLOAT_ROUND_UP 3 #define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN - -namespace emulated +namespace nbl +{ +namespace hlsl { - namespace impl +namespace impl +{ + template + uint64_t promoteToUint64(T val) { - nbl::hlsl::uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) - { - uint64_t product = uint64_t(lhs) * uint64_t(rhs); - nbl::hlsl::uint32_t2 output; - output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); - output.y = uint32_t(product & 0x00000000FFFFFFFFull); - return output; - } - - bool isNaN64(uint64_t val) - { - return bool((0x7FF0000000000000ull & val) && (0x000FFFFFFFFFFFFFull & val)); - } + using AsFloat = unsigned_integer_of_size; + uint64_t asUint = ieee754::impl::castToUintType(val); - nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) - { - nbl::hlsl::uint32_t2 output; - output.y = a1 + b1; - output.x = a0 + b0 + uint32_t(output.y < a1); + const uint64_t sign = (uint64_t(ieee754::getSignMask()) | asUint) << (sizeof(uint64_t) - sizeof(T) - 2); + const uint64_t exp = (uint64_t(ieee754::getExponentMask()) | asUint) << (ieee754::getMantissaBitCnt() - ieee754::getMantissaBitCnt() + ieee754::getExponentBitCnt()); + const uint64_t mantissa = (uint64_t(ieee754::getMantissaMask()) | asUint) << (ieee754::getMantissaBitCnt() - ieee754::getMantissaBitCnt()); - return output; - } + return sign | exp | mantissa; + }; + template<> uint64_t promoteToUint64(float64_t val) { return bit_cast(val); } - nbl::hlsl::uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) - { - nbl::hlsl::uint32_t2 output; - output.y = a1 - b1; - output.x = a0 - b0 - uint32_t(a1 < b1); - - return output; - } + nbl::hlsl::uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) + { + uint64_t product = uint64_t(lhs) * uint64_t(rhs); + nbl::hlsl::uint32_t2 output; + output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); + output.y = uint32_t(product & 0x00000000FFFFFFFFull); + return output; + } + + bool isNaN64(uint64_t val) + { + return bool((0x7FF0000000000000ull & val) && (0x000FFFFFFFFFFFFFull & val)); + } - // TODO: test - int countLeadingZeros32(uint32_t val) - { + nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + { + nbl::hlsl::uint32_t2 output; + output.y = a1 + b1; + output.x = a0 + b0 + uint32_t(output.y < a1); + + return output; + } + + + nbl::hlsl::uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + { + nbl::hlsl::uint32_t2 output; + output.y = a1 - b1; + output.x = a0 - b0 - uint32_t(a1 < b1); + + return output; + } + + // TODO: test + int countLeadingZeros32(uint32_t val) + { #ifndef __HLSL_VERSION - return 31 - nbl::hlsl::findMSB(val); + return 31 - nbl::hlsl::findMSB(val); #else - return 31 - firstbithigh(val); + return 31 - firstbithigh(val); #endif - } - - uint64_t propagateFloat64NaN(uint64_t a, uint64_t b) - { - #if defined RELAXED_NAN_PROPAGATION - return a | b; - #else + } + + uint64_t propagateFloat64NaN(uint64_t a, uint64_t b) + { + #if defined RELAXED_NAN_PROPAGATION + return a | b; + #else + + bool aIsNaN = isNaN64(a); + bool bIsNaN = isNaN64(b); + a |= 0x0008000000000000ull; + b |= 0x0008000000000000ull; + + // TODO: + //return LERP(b, LERP(a, b, nbl::hlsl::float32_t2(bIsNaN, bIsNaN)), nbl::hlsl::float32_t2(aIsNaN, aIsNaN)); + return 0xdeadbeefbadcaffeull; + #endif + } + + nbl::hlsl::uint32_t2 shortShift64Left(uint32_t a0, uint32_t a1, int count) + { + nbl::hlsl::uint32_t2 output; + output.y = a1 << count; + output.x = LERP((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); - bool aIsNaN = isNaN64(a); - bool bIsNaN = isNaN64(b); - a |= 0x0008000000000000ull; - b |= 0x0008000000000000ull; + return output; + }; + + nbl::hlsl::uint32_t2 shift64RightJamming(uint32_t a0, uint32_t a1, int count) + { + nbl::hlsl::uint32_t2 output; + const int negCount = (-count) & 31; + + output.x = LERP(0u, a0, count == 0); + output.x = LERP(output.x, (a0 >> count), count < 32); + + output.y = uint32_t((a0 | a1) != 0u); /* count >= 64 */ + uint32_t z1_lt64 = (a0>>(count & 31)) | uint32_t(((a0<>count) | uint32_t ((a1<> (count & 31)), count < 64); + output.y = LERP(output.y, (a0<>count), count < 32); + + a2 = LERP(a2 | a1, a2, count < 32); + output.x = LERP(output.x, a0 >> count, count < 32); + output.z |= uint32_t(a2 != 0u); + + output.x = LERP(output.x, 0u, (count == 32)); + output.y = LERP(output.y, a0, (count == 32)); + output.z = LERP(output.z, a1, (count == 32)); + output.x = LERP(output.x, a0, (count == 0)); + output.y = LERP(output.y, a1, (count == 0)); + output.z = LERP(output.z, a2, (count == 0)); + + return output; + } + + + uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) + { + nbl::hlsl::uint32_t2 z; + + z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; + z.y = zFrac1; + + uint64_t output = 0u; + output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; + output |= uint64_t(z.y); + return output; + } + + + uint64_t roundAndPackFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1, uint32_t zFrac2) + { + bool roundNearestEven; + bool increment; + + roundNearestEven = true; + increment = int(zFrac2) < 0; + if (!roundNearestEven) + { + if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) + { + increment = false; + } + else + { + if (false) //(zSign != 0u) + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && + // (zFrac2 != 0u); } - - - - nbl::hlsl::uint32_t2 shortShift64Left(uint32_t a0, uint32_t a1, int count) + else { - nbl::hlsl::uint32_t2 output; - output.y = a1 << count; - output.x = LERP((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); - - return output; - }; - - nbl::hlsl::uint32_t2 shift64RightJamming(uint32_t a0, uint32_t a1, int count) + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && + // (zFrac2 != 0u); + } + } + } + if (0x7FD <= zExp) { - nbl::hlsl::uint32_t2 output; - const int negCount = (-count) & 31; - - output.x = LERP(0u, a0, count == 0); - output.x = LERP(output.x, (a0 >> count), count < 32); - - output.y = uint32_t((a0 | a1) != 0u); /* count >= 64 */ - uint32_t z1_lt64 = (a0>>(count & 31)) | uint32_t(((a0<>count) | uint32_t ((a1<> (count & 31)), count < 64); - output.y = LERP(output.y, (a0<>count), count < 32); - - a2 = LERP(a2 | a1, a2, count < 32); - output.x = LERP(output.x, a0 >> count, count < 32); - output.z |= uint32_t(a2 != 0u); - - output.x = LERP(output.x, 0u, (count == 32)); - output.y = LERP(output.y, a0, (count == 32)); - output.z = LERP(output.z, a1, (count == 32)); - output.x = LERP(output.x, a0, (count == 0)); - output.y = LERP(output.y, a1, (count == 0)); - output.z = LERP(output.z, a2, (count == 0)); - - return output; + if (zSign != 0u) + { + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u); } - - - uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) + else { - nbl::hlsl::uint32_t2 z; - - z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; - z.y = zFrac1; - - uint64_t output = 0u; - output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; - output |= uint64_t(z.y); - return output; + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (zFrac2 != 0u); } - - - uint64_t roundAndPackFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1, uint32_t zFrac2) + } + } + + if (increment) { - bool roundNearestEven; - bool increment; - - roundNearestEven = true; - increment = int(zFrac2) < 0; - if (!roundNearestEven) - { - if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) - { - increment = false; - } - else - { - if (false) //(zSign != 0u) - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && - // (zFrac2 != 0u); - } - else - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && - // (zFrac2 != 0u); - } - } - } - if (0x7FD <= zExp) - { - if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && increment)) - { - if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || - // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || - // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) - { - return packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); - } - - return packFloat64(zSign, 0x7FF, 0u, 0u); - } - } - - if (zExp < 0) - { - nbl::hlsl::uint32_t3 shifted = shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, -zExp); - zFrac0 = shifted.x; - zFrac1 = shifted.y; - zFrac2 = shifted.z; - zExp = 0; - - if (roundNearestEven) - { - increment = zFrac2 < 0u; - } - else - { - if (zSign != 0u) - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u); - } - else - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (zFrac2 != 0u); - } - } - } - - if (increment) - { - nbl::hlsl::uint32_t2 added = add64(zFrac0, zFrac1, 0u, 1u); - zFrac0 = added.x; - zFrac1 = added.y; - zFrac1 &= ~((zFrac2 + uint32_t(zFrac2 == 0u)) & uint32_t(roundNearestEven)); - } - else - { - zExp = LERP(zExp, 0, (zFrac0 | zFrac1) == 0u); - } - - return packFloat64(zSign, zExp, zFrac0, zFrac1); + nbl::hlsl::uint32_t2 added = add64(zFrac0, zFrac1, 0u, 1u); + zFrac0 = added.x; + zFrac1 = added.y; + zFrac1 &= ~((zFrac2 + uint32_t(zFrac2 == 0u)) & uint32_t(roundNearestEven)); } - - uint64_t normalizeRoundAndPackFloat64(uint32_t sign, int exp, uint32_t frac0, uint32_t frac1) + else { - int shiftCount; - nbl::hlsl::uint32_t3 frac = nbl::hlsl::uint32_t3(frac0, frac1, 0u); - - if (frac.x == 0u) - { - exp -= 32; - frac.x = frac.y; - frac.y = 0u; - } - - shiftCount = countLeadingZeros32(frac.x) - 11; - if (0 <= shiftCount) - { - frac.xy = shortShift64Left(frac.x, frac.y, shiftCount); - } - else - { - frac.xyz = shift64ExtraRightJamming(frac.x, frac.y, 0u, -shiftCount); - } - exp -= shiftCount; - return roundAndPackFloat64(sign, exp, frac.x, frac.y, frac.z); + zExp = LERP(zExp, 0, (zFrac0 | zFrac1) == 0u); } - - static const uint64_t SIGN_MASK = 0x8000000000000000ull; - static const uint64_t EXP_MASK = 0x7FF0000000000000ull; - static const uint64_t MANTISA_MASK = 0x000FFFFFFFFFFFFFull; + + return packFloat64(zSign, zExp, zFrac0, zFrac1); + } + + uint64_t normalizeRoundAndPackFloat64(uint32_t sign, int exp, uint32_t frac0, uint32_t frac1) + { + int shiftCount; + nbl::hlsl::uint32_t3 frac = nbl::hlsl::uint32_t3(frac0, frac1, 0u); + + if (frac.x == 0u) + { + exp -= 32; + frac.x = frac.y; + frac.y = 0u; + } + + shiftCount = countLeadingZeros32(frac.x) - 11; + if (0 <= shiftCount) + { + frac.xy = shortShift64Left(frac.x, frac.y, shiftCount); + } + else + { + frac.xyz = shift64ExtraRightJamming(frac.x, frac.y, 0u, -shiftCount); + } + exp -= shiftCount; + return roundAndPackFloat64(sign, exp, frac.x, frac.y, frac.z); + } + + static const uint64_t SIGN_MASK = 0x8000000000000000ull; + static const uint64_t EXP_MASK = 0x7FF0000000000000ull; + static const uint64_t MANTISA_MASK = 0x000FFFFFFFFFFFFFull; } struct emulated_float64_t @@ -326,59 +344,61 @@ namespace emulated storage_t data; // constructors - // TODO: specializations - template - static emulated_float64_t create(T val) + static emulated_float64_t create(uint64_t val) { -#ifndef __HLSL_VERSION - emulated_float64_t output; - output.data = reinterpret_cast(val); - return output; -#else - uint32_t lowBits; - uint32_t highBits; - asuint(val, lowBits, highBits); - - emulated_float64_t output; - output.data = (uint64_t(highBits) << 32) | uint64_t(lowBits); - return output; -#endif + return emulated_float64_t(val); } - - static emulated_float64_t createEmulatedFloat64PreserveBitPattern(uint64_t val) + + static emulated_float64_t create(uint32_t val) { - emulated_float64_t output; - output.data = val; - return output; + return emulated_float64_t(impl::promoteToUint64(val)); + } + + static emulated_float64_t create(uint16_t val) + { + return emulated_float64_t(impl::promoteToUint64(val)); + } + + static emulated_float64_t create(float64_t val) + { + return emulated_float64_t(bit_cast(val)); } - // TODO: won't not work for uints with msb of index > 52 -#ifndef __HLSL_VERSION - template<> - static emulated_float64_t create(uint64_t val) + static emulated_float64_t create(float16_t val) { -#ifndef __HLSL_VERSION - const uint64_t msbIndex = nbl::hlsl::findMSB(val); -#else - const uint64_t msbIndex = firstbithigh(val); -#endif - uint64_t exp = ((msbIndex + 1023) << 52) & 0x7FF0000000000000; - uint64_t mantissa = (val << (52 - msbIndex)) & 0x000FFFFFFFFFFFFFull; - emulated_float64_t output; - output.data = exp | mantissa; - return output; + return emulated_float64_t(impl::promoteToUint64(val)); } -#endif - // TODO: temporary, remove -#ifndef __HLSL_VERSION - template<> - static emulated_float64_t create(double val) + + static emulated_float64_t create(float32_t val) + { + return emulated_float64_t(impl::promoteToUint64(val)); + } + + //TODO do i need that? + static emulated_float64_t createEmulatedFloat64PreserveBitPattern(uint64_t val) { emulated_float64_t output; - output.data = reinterpret_cast(val); + output.data = val; return output; } -#endif + + // TODO: won't not work for uints with msb of index > 52 +//#ifndef __HLSL_VERSION +// template<> +// static emulated_float64_t create(uint64_t val) +// { +//#ifndef __HLSL_VERSION +// const uint64_t msbIndex = nbl::hlsl::findMSB(val); +//#else +// const uint64_t msbIndex = firstbithigh(val); +//#endif +// uint64_t exp = ((msbIndex + 1023) << 52) & 0x7FF0000000000000; +// uint64_t mantissa = (val << (52 - msbIndex)) & 0x000FFFFFFFFFFFFFull; +// emulated_float64_t output; +// output.data = exp | mantissa; +// return output; +// } +//#endif // arithmetic operators emulated_float64_t operator+(const emulated_float64_t rhs) @@ -561,12 +581,12 @@ namespace emulated lhsHigh |= 0x00100000u; - nbl::hlsl::uint32_t2 shifted = emulated::impl::shortShift64Left(rhsHigh, rhsLow, 12); + nbl::hlsl::uint32_t2 shifted = impl::shortShift64Left(rhsHigh, rhsLow, 12); rhsHigh = shifted.x; rhsLow = shifted.y; nbl::hlsl::uint32_t4 fracUnpacked = impl::mul64to128(lhsHigh, lhsLow, rhsHigh, rhsLow); - fracUnpacked.xy = emulated::impl::add64(fracUnpacked.x, fracUnpacked.y, lhsHigh, lhsLow); + fracUnpacked.xy = impl::add64(fracUnpacked.x, fracUnpacked.y, lhsHigh, lhsLow); fracUnpacked.z |= uint32_t(fracUnpacked.w != 0u); if (0x00200000u <= fracUnpacked.x) { @@ -596,18 +616,6 @@ namespace emulated bool operator||(const emulated_float64_t rhs) { return bool(data) || bool(rhs.data); } bool operator!() { return !bool(data); } - // conversion operators - operator bool() { return bool(data); } - operator int() { return int(data); } - operator uint32_t() { return uint32_t(data); } - operator uint64_t() { return uint64_t(data); } - operator float() { return float(data); } - //operator min16int() { return min16int(data);} - //operator float64_t() { return float64_t(data); } - //operator half() { return half(data); } - - //explicit operator int() const { return int(data); } - // OMITED OPERATORS // - not implementing bitwise and modulo operators since floating point types doesn't support them // - compound operator overload not supported in HLSL @@ -627,4 +635,54 @@ namespace emulated }; //_NBL_STATIC_INLINE_CONSTEXPR emulated_float64_t EMULATED_FLOAT64_NAN = emulated_float64_t::create(0.0 / 0.0); + +namespace ieee754 +{ + template<> int getExponentBitCnt() { return getExponentBitCnt(); } + template<> int getMantissaBitCnt() { return getMantissaBitCnt(); } + template<> int getExponentBias() { return getExponentBias(); } + template<> unsigned_integer_of_size<8>::type getExponentMask() { return getExponentMask(); } + template<> unsigned_integer_of_size<8>::type getMantissaMask() { return getMantissaMask(); } + template<> + unsigned_integer_of_size<8>::type getSignMask() + { + using AsUint = typename unsigned_integer_of_size::type; + return AsUint(0x1) << (sizeof(float64_t) * 4 - 1); + } + + template <> + uint32_t extractBiasedExponent(emulated_float64_t x) + { + return extractBiasedExponent(x.data); + } + + template <> + int extractExponent(emulated_float64_t x) + { + return extractExponent(x.data); + } + + template <> + emulated_float64_t replaceBiasedExponent(emulated_float64_t x, typename unsigned_integer_of_size::type biasedExp) + { + return emulated_float64_t(replaceBiasedExponent(x.data, biasedExp)); + } + + //// performs no overflow tests, returns x*exp2(n) + template <> + emulated_float64_t fastMulExp2(emulated_float64_t x, int n) + { + return emulated_float64_t(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n))); + } + + template <> + unsigned_integer_of_size::type extractMantissa(emulated_float64_t x) + { + return extractMantissa(x.data); + } } + +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index 1fbeb337dd..2f540916e5 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -15,6 +15,81 @@ namespace hlsl namespace glsl { +template +T bitfieldInsert(T base, T insert, int32_t offset, int32_t bits) +{ + NBL_CONSTEXPR T one = typename unsigned_integer_of_size::type(1); + const T mask = (one << bits) - one; + const T shifted_mask = mask << offset; + + insert &= mask; + base &= (~shifted_mask); + base |= (insert << offset); + + return base; +} + +namespace impl +{ + + template + struct bitfieldExtract {}; + + template + struct bitfieldExtract + { + static T __call(T val, uint32_t offsetBits, uint32_t numBits) + { + static_assert(is_integral::value, "T is not an integral type!"); + return val; + } + }; + +#ifndef __HLSL_VERSION + template + T _bitfieldExtract(T val, uint32_t offsetBits, uint32_t numBits) + { + const T mask = (T(0x1) << numBits) - 1; + const T shiftedMask = mask << offsetBits; + return (val & shiftedMask) >> offsetBits; + } +#endif + + template + struct bitfieldExtract + { + static T __call(T val, uint32_t offsetBits, uint32_t numBits) + { +#ifdef __HLSL_VERSION + return spirv::bitFieldSExtract(val, offsetBits, numBits); +#else + return _bitfieldExtract(val, offsetBits, numBits); +#endif + } + }; + + template + struct bitfieldExtract + { + static T __call(T val, uint32_t offsetBits, uint32_t numBits) + { + +#ifdef __HLSL_VERSION + return spirv::bitFieldUExtract(val, offsetBits, numBits); +#else + return _bitfieldExtract(val, offsetBits, numBits); +#endif + } + }; + +} + +template +T bitfieldExtract(T val, uint32_t offsetBits, uint32_t numBits) +{ + return impl::bitfieldExtract::value, is_integral::value>::template __call(val, offsetBits, numBits); +} + #ifdef __HLSL_VERSION /** * Generic SPIR-V @@ -112,48 +187,6 @@ void memoryBarrierShared() { spirv::memoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsWorkgroupMemoryMask); } -namespace impl -{ - -template -struct bitfieldExtract {}; - -template -struct bitfieldExtract -{ - static T __call( T val, uint32_t offsetBits, uint32_t numBits ) - { - static_assert( is_integral::value, "T is not an integral type!" ); - return val; - } -}; - -template -struct bitfieldExtract -{ - static T __call( T val, uint32_t offsetBits, uint32_t numBits ) - { - return spirv::bitFieldSExtract( val, offsetBits, numBits ); - } -}; - -template -struct bitfieldExtract -{ - static T __call( T val, uint32_t offsetBits, uint32_t numBits ) - { - return spirv::bitFieldUExtract( val, offsetBits, numBits ); - } -}; - -} - -template -T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) -{ - return impl::bitfieldExtract::value, is_integral::value>::template __call(val,offsetBits,numBits); -} - #endif } diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 55cfb5214f..1951fb33a9 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -2,100 +2,94 @@ #define _NBL_BUILTIN_HLSL_IEE754_H_INCLUDED_ #include +#include +#include -namespace nbl::hlsl::ieee754 +namespace nbl { - namespace impl +namespace hlsl +{ +namespace ieee754 +{ +namespace impl +{ + template + NBL_CONSTEXPR_STATIC_INLINE bool isTypeAllowed() + { + return is_same::value || + is_same::value || + is_same::value || + is_same::value || + is_same::value || + is_same::value; + } + + template + typename unsigned_integer_of_size::type castToUintType(T x) + { + using AsUint = typename unsigned_integer_of_size::type; + return bit_cast(x); + } + // to avoid bit cast from uintN_t to uintN_t + template <> unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } + template <> unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } + template <> unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } + + template + T castBackToFloatType(T x) { - template - unsigned_integer_of_size::type castToUintType(T x) - { - using AsUint = typename unsigned_integer_of_size::type; - return bit_cast(x); - } - - // to avoid bit cast from uintN_t to uintN_t - template <> unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } - template <> unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } - template <> unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } - - template - T replaceBiasedExponent(T x, uint32_t biasedExp) - { - static_assert(is_same::value || is_same::value || is_same::value, - "Invalid type! Only floating point or unsigned integer types are allowed."); - return bitfield_insert(x, T(biasedExp), getMantissaBitCnt(), getExponentBitCnt()); - } - - template<> - float16_t replaceBiasedExponent(float16_t x, uint32_t biasedExp) - { - return bit_cast(replaceBiasedExponent(bit_cast(x), biasedExp)); - } - - template<> - float32_t replaceBiasedExponent(float32_t x, uint32_t biasedExp) - { - return bit_cast(replaceBiasedExponent(bit_cast(x), biasedExp)); - } - - template<> - float64_t replaceBiasedExponent(float64_t x, uint32_t biasedExp) - { - return bit_cast(replaceBiasedExponent(bit_cast(x), biasedExp)); - } + using AsFloat = typename float_of_size::type; + return bit_cast(x); } + template<> uint16_t castBackToFloatType(uint16_t x) { return x; } + template<> uint32_t castBackToFloatType(uint32_t x) { return x; } + template<> uint64_t castBackToFloatType(uint64_t x) { return x; } +} template int getExponentBitCnt() { return 0xdeadbeefu; } template<> int getExponentBitCnt() { return 5; } - template<> int getExponentBitCnt() { return 5; } template<> int getExponentBitCnt() { return 8; } - template<> int getExponentBitCnt() { return 8; } template<> int getExponentBitCnt() { return 11; } - template<> int getExponentBitCnt() { return 11; } template int getMantissaBitCnt() { return 0xdeadbeefu; } template<> int getMantissaBitCnt() { return 10; } - template<> int getMantissaBitCnt() { return 10; } template<> int getMantissaBitCnt() { return 23; } - template<> int getMantissaBitCnt() { return 23; } template<> int getMantissaBitCnt() { return 52; } - template<> int getMantissaBitCnt() { return 52; } template - int getExponentBias() { return 0xdeadbeefu; } - template<> int getExponentBias() { return 15; } - template<> int getExponentBias() { return 15; } - template<> int getExponentBias() { return 127; } - template<> int getExponentBias() { return 127; } - template<> int getExponentBias() { return 1023; } - template<> int getExponentBias() { return 1023; } + int getExponentBias() + { + //static_assert(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + return (0x1 << (getExponentBitCnt::type>() - 1)) - 1; + } + + template + typename unsigned_integer_of_size::type getSignMask() + { + //static_assert(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + using AsUint = typename unsigned_integer_of_size::type; + return AsUint(0x1) << (sizeof(T) * 4 - 1); + } template - unsigned_integer_of_size::type getExponentMask() { return 0xdeadbeefu; } + typename unsigned_integer_of_size::type getExponentMask() { return 0xdeadbeefu; } template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } - template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } - template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } - template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } template - unsigned_integer_of_size::type getMantissaMask() { return 0xdeadbeefu; } + typename unsigned_integer_of_size::type getMantissaMask() { return 0xdeadbeefu; } template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } - template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } - template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } - template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } template uint32_t extractBiasedExponent(T x) { using AsUint = typename unsigned_integer_of_size::type; - return bitfieldExtract(impl::castToUintType(x), getMantissaBitCnt(), getExponentBitCnt()); + return glsl::bitfieldExtract(impl::castToUintType(x), getMantissaBitCnt::type>(), getExponentBitCnt::type>()); } template @@ -105,9 +99,10 @@ namespace nbl::hlsl::ieee754 } template - T replaceBiasedExponent(T x, uint32_t biasedExp) + T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { - return impl::replaceBiasedExponent(x, biasedExp); + //static_assert(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + return impl::castBackToFloatType(glsl::bitfieldInsert(impl::castToUintType(x), biasedExp, getMantissaBitCnt(), getExponentBitCnt())); } // performs no overflow tests, returns x*exp2(n) @@ -118,11 +113,13 @@ namespace nbl::hlsl::ieee754 } template - unsigned_integer_of_size::type extractMantissa(T x) + typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; - return (impl::castToUintType(x) & getMantissaMask()); + return impl::castToUintType(x) & getMantissaMask::type>(); } } +} +} #endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 18ee1383ba..fe9c8d4abf 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -229,6 +229,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_nor LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") #emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t.hlsl") +#utility +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/fragment_shader_pixel_interlock.hlsl") From 4a937b958d16ee7609bc23aa2b8512ca921362b1 Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 21 Jun 2024 20:03:51 +0200 Subject: [PATCH 018/432] Saving work --- examples_tests | 2 +- include/nbl/builtin/hlsl/bit.hlsl | 3 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 46 +++++++++---------- .../nbl/builtin/hlsl/glsl_compat/core.hlsl | 3 +- include/nbl/builtin/hlsl/ieee754.hlsl | 28 +++++++---- include/nbl/builtin/hlsl/type_traits.hlsl | 2 +- 6 files changed, 47 insertions(+), 37 deletions(-) diff --git a/examples_tests b/examples_tests index 9874800e41..f17208a703 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9874800e41d75327724b5ed6cf4b11cabc7eedd7 +Subproject commit f17208a7039e30af359d0e99cf896f2d013670ff diff --git a/include/nbl/builtin/hlsl/bit.hlsl b/include/nbl/builtin/hlsl/bit.hlsl index e1f1117af6..a94103e515 100644 --- a/include/nbl/builtin/hlsl/bit.hlsl +++ b/include/nbl/builtin/hlsl/bit.hlsl @@ -36,7 +36,8 @@ namespace hlsl template To bit_cast(From val) { - static_assert(sizeof(To) <= sizeof(From)); + // TODO: fix + //static_assert(sizeof(To) <= sizeof(From)); return spirv::bitcast(val); } diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index fe37f001be..f8f3b202a2 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -38,12 +38,14 @@ namespace impl template uint64_t promoteToUint64(T val) { - using AsFloat = unsigned_integer_of_size; + using AsFloat = typename float_of_size::type; uint64_t asUint = ieee754::impl::castToUintType(val); - const uint64_t sign = (uint64_t(ieee754::getSignMask()) | asUint) << (sizeof(uint64_t) - sizeof(T) - 2); - const uint64_t exp = (uint64_t(ieee754::getExponentMask()) | asUint) << (ieee754::getMantissaBitCnt() - ieee754::getMantissaBitCnt() + ieee754::getExponentBitCnt()); - const uint64_t mantissa = (uint64_t(ieee754::getMantissaMask()) | asUint) << (ieee754::getMantissaBitCnt() - ieee754::getMantissaBitCnt()); + const uint64_t sign = (uint64_t(ieee754::getSignMask()) & asUint) << (sizeof(float64_t) - sizeof(T)); + const int64_t newExponent = ieee754::extractExponent(val) + ieee754::getExponentBias(); + + const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::getExponentBias()) << (ieee754::getMantissaBitCnt()); + const uint64_t mantissa = (uint64_t(ieee754::getMantissaMask()) & asUint) << (ieee754::getMantissaBitCnt() - ieee754::getMantissaBitCnt()); return sign | exp | mantissa; }; @@ -351,6 +353,9 @@ namespace impl static emulated_float64_t create(uint32_t val) { +#ifndef __HLSL_VERSION + std::cout << val; +#endif return emulated_float64_t(impl::promoteToUint64(val)); } @@ -373,14 +378,6 @@ namespace impl { return emulated_float64_t(impl::promoteToUint64(val)); } - - //TODO do i need that? - static emulated_float64_t createEmulatedFloat64PreserveBitPattern(uint64_t val) - { - emulated_float64_t output; - output.data = val; - return output; - } // TODO: won't not work for uints with msb of index > 52 //#ifndef __HLSL_VERSION @@ -403,7 +400,7 @@ namespace impl // arithmetic operators emulated_float64_t operator+(const emulated_float64_t rhs) { - emulated_float64_t retval = createEmulatedFloat64PreserveBitPattern(0u); + emulated_float64_t retval = create(0u); uint32_t lhsSign = uint32_t((data & 0x8000000000000000ull) >> 32); uint32_t rhsSign = uint32_t((rhs.data & 0x8000000000000000ull) >> 32); @@ -428,12 +425,12 @@ namespace impl //if (lhsExp == 0x7FF) //{ // bool propagate = (lhsMantissa | rhsMantissa) != 0u; - // return createEmulatedFloat64PreserveBitPattern(LERP(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + // return create(LERP(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); //} frac.xy = impl::add64(lhsHigh, lhsLow, rhsHigh, rhsLow); if (lhsExp == 0) - return createEmulatedFloat64PreserveBitPattern(impl::packFloat64(lhsSign, 0, frac.x, frac.y)); + return create(impl::packFloat64(lhsSign, 0, frac.x, frac.y)); frac.z = 0u; frac.x |= 0x00200000u; exp = lhsExp; @@ -446,13 +443,12 @@ namespace impl EXCHANGE(lhsHigh, rhsHigh); EXCHANGE(lhsLow, rhsLow); EXCHANGE(lhsExp, rhsExp); - EXCHANGE(lhsExp, rhsExp); } if (lhsExp == 0x7FF) { bool propagate = (lhsHigh | lhsLow) != 0u; - return createEmulatedFloat64PreserveBitPattern(LERP(0x7FF0000000000000ull | (uint64_t(lhsSign) << 32), impl::propagateFloat64NaN(data, rhs.data), propagate)); + return create(LERP(0x7FF0000000000000ull | (uint64_t(lhsSign) << 32), impl::propagateFloat64NaN(data, rhs.data), propagate)); } expDiff = LERP(ABS(expDiff), ABS(expDiff) - 1, rhsExp == 0); @@ -472,11 +468,11 @@ namespace impl ++exp; } - return createEmulatedFloat64PreserveBitPattern(impl::roundAndPackFloat64(lhsSign, exp, frac.x, frac.y, frac.z)); + return create(impl::roundAndPackFloat64(lhsSign, exp, frac.x, frac.y, frac.z)); } // cannot happen but compiler cries about not every path returning value - return createEmulatedFloat64PreserveBitPattern(0xdeadbeefbadcaffeull); + return create(0xdeadbeefbadcaffeull); } else { @@ -516,7 +512,7 @@ namespace impl frac.xy = impl::sub64(lhsHigh, lhsLow, rhsHigh, rhsLow); exp = lhsExp; --exp; - return createEmulatedFloat64PreserveBitPattern(impl::normalizeRoundAndPackFloat64(lhsSign, exp - 10, frac.x, frac.y)); + return create(impl::normalizeRoundAndPackFloat64(lhsSign, exp - 10, frac.x, frac.y)); } //if (lhsExp == 0x7FF) //{ @@ -552,13 +548,13 @@ namespace impl lhsSign ^= signOfDifference; uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, exp - 11, frac.x, frac.y); - return createEmulatedFloat64PreserveBitPattern(LERP(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + return create(LERP(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); } } emulated_float64_t operator-(emulated_float64_t rhs) { - emulated_float64_t lhs = createEmulatedFloat64PreserveBitPattern(data); + emulated_float64_t lhs = create(data); emulated_float64_t rhsFlipped = rhs.flipSign(); return lhs + rhsFlipped; @@ -594,13 +590,13 @@ namespace impl ++exp; } - return createEmulatedFloat64PreserveBitPattern(impl::roundAndPackFloat64(sign, exp, fracUnpacked.x, fracUnpacked.y, fracUnpacked.z)); + return create(impl::roundAndPackFloat64(sign, exp, fracUnpacked.x, fracUnpacked.y, fracUnpacked.z)); } // TODO emulated_float64_t operator/(const emulated_float64_t rhs) { - return createEmulatedFloat64PreserveBitPattern(0xdeadbeefbadcaffeull); + return create(0xdeadbeefbadcaffeull); } // relational operators @@ -625,7 +621,7 @@ namespace impl emulated_float64_t flipSign() { const uint64_t flippedSign = ((~data) & 0x8000000000000000ull); - return createEmulatedFloat64PreserveBitPattern(flippedSign | (data & 0x7FFFFFFFFFFFFFFFull)); + return create(flippedSign | (data & 0x7FFFFFFFFFFFFFFFull)); } bool isNaN() diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index 2337618df1..b9f24e65be 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -87,7 +87,8 @@ namespace impl template T bitfieldExtract(T val, uint32_t offsetBits, uint32_t numBits) { - return impl::bitfieldExtract::value, is_integral::value>::template __call(val, offsetBits, numBits); + impl::bitfieldExtract::value, is_integral::value> extractStruct; + return extractStruct.__call(val, offsetBits, numBits); } #ifdef __HLSL_VERSION diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 1951fb33a9..2cd74caa54 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -5,6 +5,18 @@ #include #include +// TODO: delete +#ifdef __HLSL_VERSION +#define staticAssertTmp(...) ; +#else +void dbgBreakIf(bool condition) +{ + if (!condition) + __debugbreak(); +} +#define staticAssertTmp(x, ...) dbgBreakIf(x); +#endif + namespace nbl { namespace hlsl @@ -47,13 +59,13 @@ namespace impl } template - int getExponentBitCnt() { return 0xdeadbeefu; } + int getExponentBitCnt() { staticAssertTmp(false); return 0xdeadbeefu; } template<> int getExponentBitCnt() { return 5; } template<> int getExponentBitCnt() { return 8; } template<> int getExponentBitCnt() { return 11; } template - int getMantissaBitCnt() { return 0xdeadbeefu; } + int getMantissaBitCnt() { staticAssertTmp(false); return 0xdeadbeefu; /*TODO: add static assert fail to every 0xdeadbeef, fix all static asserts*/ } template<> int getMantissaBitCnt() { return 10; } template<> int getMantissaBitCnt() { return 23; } template<> int getMantissaBitCnt() { return 52; } @@ -61,26 +73,26 @@ namespace impl template int getExponentBias() { - //static_assert(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); return (0x1 << (getExponentBitCnt::type>() - 1)) - 1; } template typename unsigned_integer_of_size::type getSignMask() { - //static_assert(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); using AsUint = typename unsigned_integer_of_size::type; return AsUint(0x1) << (sizeof(T) * 4 - 1); } template - typename unsigned_integer_of_size::type getExponentMask() { return 0xdeadbeefu; } + typename unsigned_integer_of_size::type getExponentMask() { staticAssertTmp(false); return 0xdeadbeefu; } template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } template - typename unsigned_integer_of_size::type getMantissaMask() { return 0xdeadbeefu; } + typename unsigned_integer_of_size::type getMantissaMask() { staticAssertTmp(false); return 0xdeadbeefu; } template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } @@ -95,13 +107,13 @@ namespace impl template int extractExponent(T x) { - return int(extractBiasedExponent(x) - getExponentBias()); + return int(extractBiasedExponent(x)) - int(getExponentBias()); } template T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { - //static_assert(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); return impl::castBackToFloatType(glsl::bitfieldInsert(impl::castToUintType(x), biasedExp, getMantissaBitCnt(), getExponentBitCnt())); } diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 286f86d3a0..8dab4bcb1c 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -698,7 +698,7 @@ struct float_of_size<4> template<> struct float_of_size<8> { - using type = uint64_t; + using type = float64_t; }; } From 669983fddc8cf281f434a80ea21e4d9a60f5cc38 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 8 Jul 2024 16:58:32 +0200 Subject: [PATCH 019/432] Improvements --- include/nbl/builtin/hlsl/algorithm.hlsl | 79 ++ .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 827 ++++++++++-------- include/nbl/builtin/hlsl/ieee754.hlsl | 152 ++-- include/nbl/builtin/hlsl/tgmath.hlsl | 25 + src/nbl/builtin/CMakeLists.txt | 1 + 5 files changed, 661 insertions(+), 423 deletions(-) create mode 100644 include/nbl/builtin/hlsl/tgmath.hlsl diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 757105081b..564ef59f20 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -11,6 +11,84 @@ namespace nbl namespace hlsl { +namespace impl +{ +#ifdef __HLSL_VERSION + + // TODO: use structs + + template + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + { + T tmp = lhs; + lhs = rhs; + rhs = tmp; + } + + template<> + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + { + lhs ^= rhs; + rhs ^= lhs; + lhs ^= rhs; + } + + template<> + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + { + lhs ^= rhs; + rhs ^= lhs; + lhs ^= rhs; + } + + template<> + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + { + lhs ^= rhs; + rhs ^= lhs; + lhs ^= rhs; + } + + template<> + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + { + lhs ^= rhs; + rhs ^= lhs; + lhs ^= rhs; + } + + template<> + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + { + lhs ^= rhs; + rhs ^= lhs; + lhs ^= rhs; + } + + template<> + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + { + lhs ^= rhs; + rhs ^= lhs; + lhs ^= rhs; + } +#else + template + NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + { + std::swap(lhs, rhs); + } +#endif +} + +template +NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +{ + impl::swap(lhs, rhs); +} + + +#ifdef __HLSL_VERSION namespace impl { @@ -146,6 +224,7 @@ uint upper_bound(inout Accessor accessor, const uint begin, const uint end, cons return impl::upper_bound(accessor,begin,end,value); } +#endif } } diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index f8f3b202a2..a631b0ff03 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -3,32 +3,44 @@ #include #include - -#ifdef __HLSL_VERSION -#define LERP lerp -#else -#define LERP nbl::hlsl::lerp -#endif - -#ifdef __HLSL_VERSION -#define ABS abs -#else -#define ABS std::abs -#endif - -// TODO: inline function -#define EXCHANGE(a, b) \ - do { \ - a ^= b; \ - b ^= a; \ - a ^= b; \ - } while (false) +#include +#include + +// TODO: when it will be possible, use this unions wherever they fit: +/* +* union Mantissa +* { +* struct +* { +* uint32_t highBits; +* uint64_t lowBits; +* }; +* +* uint32_t2 packed; +* }; +* +*/ + +/* +* union Mantissa +* { +* struct +* { +* uint64_t lhs; +* uint64_t rhs; +* }; +* +* uint32_t4 packed; +* }; +* +*/ #define FLOAT_ROUND_NEAREST_EVEN 0 #define FLOAT_ROUND_TO_ZERO 1 #define FLOAT_ROUND_DOWN 2 #define FLOAT_ROUND_UP 3 #define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN + namespace nbl { namespace hlsl @@ -41,11 +53,11 @@ namespace impl using AsFloat = typename float_of_size::type; uint64_t asUint = ieee754::impl::castToUintType(val); - const uint64_t sign = (uint64_t(ieee754::getSignMask()) & asUint) << (sizeof(float64_t) - sizeof(T)); - const int64_t newExponent = ieee754::extractExponent(val) + ieee754::getExponentBias(); + const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float64_t) - sizeof(T)); + const int64_t newExponent = ieee754::extractExponent(val) + ieee754::traits::exponentBias; - const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::getExponentBias()) << (ieee754::getMantissaBitCnt()); - const uint64_t mantissa = (uint64_t(ieee754::getMantissaMask()) & asUint) << (ieee754::getMantissaBitCnt() - ieee754::getMantissaBitCnt()); + const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::exponentBias - ieee754::traits::mantissaBitCnt); return sign | exp | mantissa; }; @@ -60,10 +72,42 @@ namespace impl output.y = uint32_t(product & 0x00000000FFFFFFFFull); return output; } - - bool isNaN64(uint64_t val) + + uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) { - return bool((0x7FF0000000000000ull & val) && (0x000FFFFFFFFFFFFFull & val)); +#if defined RELAXED_NAN_PROPAGATION + return lhs | rhs; +#else + const bool lhsIsNaN = isnan(bit_cast(lhs)); + const bool rhsIsNaN = isnan(bit_cast(rhs)); + lhs |= 0x0000000000080000ull; + rhs |= 0x0000000000080000ull; + + return lerp(rhs, lerp(lhs, rhs, rhsIsNaN), lhsIsNaN); +#endif + } + + uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) + { + nbl::hlsl::uint32_t2 z; + + z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; + z.y = zFrac1; + + uint64_t output = 0u; + output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; + output |= uint64_t(z.y); + return output; + } + + uint32_t2 packUint64(uint64_t val) + { + return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); + } + + uint64_t unpackUint64(uint32_t2 val) + { + return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); } nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) @@ -95,248 +139,233 @@ namespace impl #endif } - uint64_t propagateFloat64NaN(uint64_t a, uint64_t b) - { - #if defined RELAXED_NAN_PROPAGATION - return a | b; - #else - - bool aIsNaN = isNaN64(a); - bool bIsNaN = isNaN64(b); - a |= 0x0008000000000000ull; - b |= 0x0008000000000000ull; - - // TODO: - //return LERP(b, LERP(a, b, nbl::hlsl::float32_t2(bIsNaN, bIsNaN)), nbl::hlsl::float32_t2(aIsNaN, aIsNaN)); - return 0xdeadbeefbadcaffeull; - #endif - } - - nbl::hlsl::uint32_t2 shortShift64Left(uint32_t a0, uint32_t a1, int count) - { - nbl::hlsl::uint32_t2 output; - output.y = a1 << count; - output.x = LERP((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); - - return output; - }; - - nbl::hlsl::uint32_t2 shift64RightJamming(uint32_t a0, uint32_t a1, int count) + uint32_t2 shift64RightJamming(uint32_t2 val, int count) { - nbl::hlsl::uint32_t2 output; + uint32_t2 output; const int negCount = (-count) & 31; - output.x = LERP(0u, a0, count == 0); - output.x = LERP(output.x, (a0 >> count), count < 32); + output.x = lerp(0u, val.x, count == 0); + output.x = lerp(output.x, (val.x >> count), count < 32); - output.y = uint32_t((a0 | a1) != 0u); /* count >= 64 */ - uint32_t z1_lt64 = (a0>>(count & 31)) | uint32_t(((a0<>count) | uint32_t ((a1<= 64 */ + uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); - output.y = LERP(output.y, (a0<>count), count < 32); + output.y = lerp(0u, (val.x >> (count & 31)), count < 64); + output.y = lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); - a2 = LERP(a2 | a1, a2, count < 32); - output.x = LERP(output.x, a0 >> count, count < 32); - output.z |= uint32_t(a2 != 0u); + val.z = lerp(val.z | val.y, val.z, count < 32); + output.x = lerp(output.x, val.x >> count, count < 32); + output.z |= uint32_t(val.z != 0u); - output.x = LERP(output.x, 0u, (count == 32)); - output.y = LERP(output.y, a0, (count == 32)); - output.z = LERP(output.z, a1, (count == 32)); - output.x = LERP(output.x, a0, (count == 0)); - output.y = LERP(output.y, a1, (count == 0)); - output.z = LERP(output.z, a2, (count == 0)); + output.x = lerp(output.x, 0u, (count == 32)); + output.y = lerp(output.y, val.x, (count == 32)); + output.z = lerp(output.z, val.y, (count == 32)); + output.x = lerp(output.x, val.x, (count == 0)); + output.y = lerp(output.y, val.y, (count == 0)); + output.z = lerp(output.z, val.z, (count == 0)); return output; } - - - uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) + + uint64_t shortShift64Left(uint64_t val, int count) { - nbl::hlsl::uint32_t2 z; - - z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; - z.y = zFrac1; - - uint64_t output = 0u; - output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; - output |= uint64_t(z.y); - return output; + const uint32_t2 packed = packUint64(val); + + nbl::hlsl::uint32_t2 output; + output.y = packed.y << count; + // TODO: fix + output.x = lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); + + // y = 3092377600 + // x = 2119009566 + return unpackUint64(output); + }; + + uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) + { + return signShifted + expShifted + mantissa; } - - uint64_t roundAndPackFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1, uint32_t zFrac2) + uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) { bool roundNearestEven; bool increment; roundNearestEven = true; - increment = int(zFrac2) < 0; + increment = int(mantissaExtended.z) < 0; if (!roundNearestEven) { - if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) - { - increment = false; - } - else - { - if (false) //(zSign != 0u) - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && - // (zFrac2 != 0u); - } - else - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && - // (zFrac2 != 0u); - } - } + if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) + { + increment = false; + } + else + { + if (false) //(zSign != 0u) + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && + // (zFrac2 != 0u); + } + else + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && + // (zFrac2 != 0u); + } + } } if (0x7FD <= zExp) { - if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && increment)) - { - if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || - // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || - // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) - { - return packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); - } - - return packFloat64(zSign, 0x7FF, 0u, 0u); + if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) + { + if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || + // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || + // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) + { + return packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); + } + + return packFloat64(zSign, 0x7FF, 0u, 0u); } } if (zExp < 0) { - nbl::hlsl::uint32_t3 shifted = shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, -zExp); - zFrac0 = shifted.x; - zFrac1 = shifted.y; - zFrac2 = shifted.z; - zExp = 0; - - if (roundNearestEven) - { - increment = zFrac2 < 0u; - } - else - { - if (zSign != 0u) - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u); - } - else - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (zFrac2 != 0u); - } - } + mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); + zExp = 0; + + if (roundNearestEven) + { + increment = mantissaExtended.z < 0u; + } + else + { + if (zSign != 0u) + { + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (mantissaExtended.z != 0u); + } + else + { + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (mantissaExtended.z != 0u); + } + } } if (increment) { - nbl::hlsl::uint32_t2 added = add64(zFrac0, zFrac1, 0u, 1u); - zFrac0 = added.x; - zFrac1 = added.y; - zFrac1 &= ~((zFrac2 + uint32_t(zFrac2 == 0u)) & uint32_t(roundNearestEven)); + const uint64_t added = impl::unpackUint64(uint32_t2(mantissaExtended.xy)) + 1ull; + mantissaExtended.xy = packUint64(added); + mantissaExtended.y &= ~((mantissaExtended.z + uint32_t(mantissaExtended.z == 0u)) & uint32_t(roundNearestEven)); } else { - zExp = LERP(zExp, 0, (zFrac0 | zFrac1) == 0u); + zExp = lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); } - return packFloat64(zSign, zExp, zFrac0, zFrac1); + return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); } - uint64_t normalizeRoundAndPackFloat64(uint32_t sign, int exp, uint32_t frac0, uint32_t frac1) + uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) { int shiftCount; nbl::hlsl::uint32_t3 frac = nbl::hlsl::uint32_t3(frac0, frac1, 0u); if (frac.x == 0u) { - exp -= 32; - frac.x = frac.y; - frac.y = 0u; + exp -= 32; + frac.x = frac.y; + frac.y = 0u; } shiftCount = countLeadingZeros32(frac.x) - 11; if (0 <= shiftCount) { - frac.xy = shortShift64Left(frac.x, frac.y, shiftCount); + frac.xy = shortShift64Left(unpackUint64(frac.xy), shiftCount); } else { - frac.xyz = shift64ExtraRightJamming(frac.x, frac.y, 0u, -shiftCount); + frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); } exp -= shiftCount; - return roundAndPackFloat64(sign, exp, frac.x, frac.y, frac.z); + return roundAndPackFloat64(sign, exp, frac); } - - static const uint64_t SIGN_MASK = 0x8000000000000000ull; - static const uint64_t EXP_MASK = 0x7FF0000000000000ull; - static const uint64_t MANTISA_MASK = 0x000FFFFFFFFFFFFFull; + + void normalizeFloat64Subnormal(uint64_t mantissa, + NBL_REF_ARG(int) outExp, + NBL_REF_ARG(uint64_t) outMantissa) + { + uint32_t2 mantissaPacked = packUint64(mantissa); + int shiftCount; + uint32_t2 temp; + shiftCount = countLeadingZeros32(lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; + outExp = lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); + + temp.x = lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); + temp.y = lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); + + shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); + + outMantissa = lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); + } + } struct emulated_float64_t @@ -346,144 +375,130 @@ namespace impl storage_t data; // constructors - static emulated_float64_t create(uint64_t val) + /*static emulated_float64_t create(uint16_t val) { - return emulated_float64_t(val); + return emulated_float64_t(bit_cast(float64_t(val))); + }*/ + + static emulated_float64_t create(int32_t val) + { + return emulated_float64_t(bit_cast(float64_t(val))); + } + + static emulated_float64_t create(int64_t val) + { + return emulated_float64_t(bit_cast(float64_t(val))); } static emulated_float64_t create(uint32_t val) { -#ifndef __HLSL_VERSION - std::cout << val; -#endif - return emulated_float64_t(impl::promoteToUint64(val)); + return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t create(uint16_t val) + static emulated_float64_t create(uint64_t val) { - return emulated_float64_t(impl::promoteToUint64(val)); + return emulated_float64_t(bit_cast(float64_t(val))); } static emulated_float64_t create(float64_t val) { - return emulated_float64_t(bit_cast(val)); + return emulated_float64_t(bit_cast(val)); } - static emulated_float64_t create(float16_t val) + // TODO: unresolved external symbol imath_half_to_float_table + /*static emulated_float64_t create(float16_t val) { - return emulated_float64_t(impl::promoteToUint64(val)); - } + return emulated_float64_t(bit_cast(float64_t(val))); + }*/ static emulated_float64_t create(float32_t val) { - return emulated_float64_t(impl::promoteToUint64(val)); + return emulated_float64_t(bit_cast(float64_t(val))); + } + + static emulated_float64_t createPreserveBitPattern(uint64_t val) + { + return emulated_float64_t(val); } - - // TODO: won't not work for uints with msb of index > 52 -//#ifndef __HLSL_VERSION -// template<> -// static emulated_float64_t create(uint64_t val) -// { -//#ifndef __HLSL_VERSION -// const uint64_t msbIndex = nbl::hlsl::findMSB(val); -//#else -// const uint64_t msbIndex = firstbithigh(val); -//#endif -// uint64_t exp = ((msbIndex + 1023) << 52) & 0x7FF0000000000000; -// uint64_t mantissa = (val << (52 - msbIndex)) & 0x000FFFFFFFFFFFFFull; -// emulated_float64_t output; -// output.data = exp | mantissa; -// return output; -// } -//#endif // arithmetic operators emulated_float64_t operator+(const emulated_float64_t rhs) { - emulated_float64_t retval = create(0u); - - uint32_t lhsSign = uint32_t((data & 0x8000000000000000ull) >> 32); - uint32_t rhsSign = uint32_t((rhs.data & 0x8000000000000000ull) >> 32); - - uint32_t lhsLow = uint32_t(data & 0x00000000FFFFFFFFull); - uint32_t rhsLow = uint32_t(rhs.data & 0x00000000FFFFFFFFull); - uint32_t lhsHigh = uint32_t((data & 0x000FFFFF00000000ull) >> 32); - uint32_t rhsHigh = uint32_t((rhs.data & 0x000FFFFF00000000ull) >> 32); - - int lhsExp = int((data >> 52) & 0x7FFull); - int rhsExp = int((rhs.data >> 52) & 0x7FFull); + emulated_float64_t retval = createPreserveBitPattern(0u); + + uint64_t mantissa; + uint32_t3 mantissaExtended; + int biasedExp; + + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - int expDiff = lhsExp - rhsExp; + int expDiff = lhsBiasedExp - rhsBiasedExp; if (lhsSign == rhsSign) { - nbl::hlsl::uint32_t3 frac; - int exp; - if (expDiff == 0) { - //if (lhsExp == 0x7FF) - //{ - // bool propagate = (lhsMantissa | rhsMantissa) != 0u; - // return create(LERP(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); - //} + //if (lhsExp == 0x7FF) + //{ + // bool propagate = (lhsMantissa | rhsMantissa) != 0u; + // return createPreserveBitPattern(lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + //} - frac.xy = impl::add64(lhsHigh, lhsLow, rhsHigh, rhsLow); - if (lhsExp == 0) - return create(impl::packFloat64(lhsSign, 0, frac.x, frac.y)); - frac.z = 0u; - frac.x |= 0x00200000u; - exp = lhsExp; - frac = impl::shift64ExtraRightJamming(frac.x, frac.y, frac.z, 1); + mantissa = lhsMantissa + rhsMantissa; + if (lhsBiasedExp == 0) + return createPreserveBitPattern(impl::assembleFloat64(lhsSign, 0, mantissa)); + mantissaExtended.xy = impl::packUint64(mantissa); + mantissaExtended.x |= 0x00200000u; + mantissaExtended.z = 0u; + biasedExp = lhsBiasedExp; + + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); } else { if (expDiff < 0) { - EXCHANGE(lhsHigh, rhsHigh); - EXCHANGE(lhsLow, rhsLow); - EXCHANGE(lhsExp, rhsExp); + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); } - if (lhsExp == 0x7FF) + if (lhsBiasedExp == 0x7FF) { - bool propagate = (lhsHigh | lhsLow) != 0u; - return create(LERP(0x7FF0000000000000ull | (uint64_t(lhsSign) << 32), impl::propagateFloat64NaN(data, rhs.data), propagate)); + const bool propagate = (lhsMantissa) != 0u; + return createPreserveBitPattern(lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); } - expDiff = LERP(ABS(expDiff), ABS(expDiff) - 1, rhsExp == 0); - rhsHigh = LERP(rhsHigh | 0x00100000u, rhsHigh, rhsExp == 0); - nbl::hlsl::float32_t3 shifted = impl::shift64ExtraRightJamming(rhsHigh, rhsLow, 0u, expDiff); - rhsHigh = shifted.x; - rhsLow = shifted.y; - frac.z = shifted.z; - exp = lhsExp; - - lhsHigh |= 0x00100000u; - frac.xy = impl::add64(lhsHigh, lhsLow, rhsHigh, rhsLow); - --exp; - if (!(frac.x < 0x00200000u)) + expDiff = lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = lerp(rhsMantissa | 0x0010000000000000ull, rhsMantissa, rhsBiasedExp == 0); + const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); + rhsMantissa = impl::unpackUint64(shifted.xy); + mantissaExtended.z = shifted.z; + biasedExp = lhsBiasedExp; + + lhsMantissa |= 0x0010000000000000ull; + mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); + --biasedExp; + if (!(mantissaExtended.x < 0x00200000u)) { - frac = impl::shift64ExtraRightJamming(frac.x, frac.y, frac.z, 1); - ++exp; + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); + ++biasedExp; } - return create(impl::roundAndPackFloat64(lhsSign, exp, frac.x, frac.y, frac.z)); + return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); } // cannot happen but compiler cries about not every path returning value - return create(0xdeadbeefbadcaffeull); + return createPreserveBitPattern(0xdeadbeefbadcaffeull); } else { - int exp; - - nbl::hlsl::uint32_t2 lhsShifted = impl::shortShift64Left(lhsHigh, lhsLow, 10); - lhsHigh = lhsShifted.x; - lhsLow = lhsShifted.y; - nbl::hlsl::uint32_t2 rhsShifted = impl::shortShift64Left(rhsHigh, rhsLow, 10); - rhsHigh = rhsShifted.x; - rhsLow = rhsShifted.y; + lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); + rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); if (expDiff != 0) { @@ -491,10 +506,9 @@ namespace impl if (expDiff < 0) { - EXCHANGE(lhsHigh, rhsHigh); - EXCHANGE(lhsLow, rhsLow); - EXCHANGE(lhsExp, rhsExp); - lhsSign ^= 0x80000000u; + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); + lhsSign ^= ieee754::traits::signMask; } //if (lhsExp == 0x7FF) @@ -503,113 +517,206 @@ namespace impl // return nbl::hlsl::lerp(__packFloat64(lhsSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); //} - expDiff = LERP(ABS(expDiff), ABS(expDiff) - 1, rhsExp == 0); - rhsHigh = LERP(rhsHigh | 0x40000000u, rhsHigh, rhsExp == 0); - nbl::hlsl::uint32_t2 shifted = impl::shift64RightJamming(rhsHigh, rhsLow, expDiff); - rhsHigh = shifted.x; - rhsLow = shifted.y; - lhsHigh |= 0x40000000u; - frac.xy = impl::sub64(lhsHigh, lhsLow, rhsHigh, rhsLow); - exp = lhsExp; - --exp; - return create(impl::normalizeRoundAndPackFloat64(lhsSign, exp - 10, frac.x, frac.y)); + expDiff = lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); + rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); + lhsMantissa |= 0x4000000000000000ull; + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + biasedExp = lhsBiasedExp; + --biasedExp; + return createPreserveBitPattern(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); } //if (lhsExp == 0x7FF) //{ // bool propagate = ((lhsHigh | rhsHigh) | (lhsLow | rhsLow)) != 0u; // return nbl::hlsl::lerp(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); //} - rhsExp = LERP(rhsExp, 1, lhsExp == 0); - lhsExp = LERP(lhsExp, 1, lhsExp == 0); + rhsBiasedExp = lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); + lhsBiasedExp = lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); - nbl::hlsl::uint32_t2 frac; - uint32_t signOfDifference = 0; - if (rhsHigh < lhsHigh) + + const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); + const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); + + uint32_t2 frac; + uint64_t signOfDifference = 0; + if (rhsMantissaPacked.x < lhsMantissaPacked.x) { - frac.xy = impl::sub64(lhsHigh, lhsLow, rhsHigh, rhsLow); + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); } - else if (lhsHigh < rhsHigh) + else if (lhsMantissaPacked.x < rhsMantissaPacked.x) { - frac.xy = impl::sub64(rhsHigh, rhsLow, lhsHigh, lhsLow); - signOfDifference = 0x80000000; + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; } - else if (rhsLow <= lhsLow) + else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) { - /* It is possible that frac.x and frac.y may be zero after this. */ - frac.xy = impl::sub64(lhsHigh, lhsLow, rhsHigh, rhsLow); + /* It is possible that frac.x and frac.y may be zero after this. */ + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); } else { - frac.xy = impl::sub64(rhsHigh, rhsLow, lhsHigh, lhsLow); - signOfDifference = 0x80000000; + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; } - exp = LERP(rhsExp, lhsExp, signOfDifference == 0u); + biasedExp = lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); lhsSign ^= signOfDifference; uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); - uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, exp - 11, frac.x, frac.y); - return create(LERP(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); + return createPreserveBitPattern(lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); } } emulated_float64_t operator-(emulated_float64_t rhs) { - emulated_float64_t lhs = create(data); + emulated_float64_t lhs = createPreserveBitPattern(data); emulated_float64_t rhsFlipped = rhs.flipSign(); return lhs + rhsFlipped; } - emulated_float64_t operator*(const emulated_float64_t rhs) + emulated_float64_t operator*(emulated_float64_t rhs) { - emulated_float64_t retval = emulated_float64_t::create(0u); - - uint32_t lhsLow = uint32_t(data & 0x00000000FFFFFFFFull); - uint32_t rhsLow = uint32_t(rhs.data & 0x00000000FFFFFFFFull); - uint32_t lhsHigh = uint32_t((data & 0x000FFFFF00000000ull) >> 32); - uint32_t rhsHigh = uint32_t((rhs.data & 0x000FFFFF00000000ull) >> 32); + emulated_float64_t retval = emulated_float64_t::createPreserveBitPattern(0u); - uint32_t lhsExp = uint32_t((data >> 52) & 0x7FFull); - uint32_t rhsExp = uint32_t((rhs.data >> 52) & 0x7FFull); + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - int32_t exp = int32_t(lhsExp + rhsExp) - 0x400u; - uint64_t sign = uint32_t(((data ^ rhs.data) & 0x8000000000000000ull) >> 32); + int exp = int(lhsBiasedExp + rhsBiasedExp) - 0x400; + uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; + if (lhsBiasedExp == 0x7FF) + { + if ((lhsMantissa != 0u) || + ((rhsBiasedExp == 0x7FF) && (rhsMantissa != 0u))) { + return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); + } + if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) + return createPreserveBitPattern(0xFFFFFFFFFFFFFFFFull); - lhsHigh |= 0x00100000u; - nbl::hlsl::uint32_t2 shifted = impl::shortShift64Left(rhsHigh, rhsLow, 12); - rhsHigh = shifted.x; - rhsLow = shifted.y; + return createPreserveBitPattern(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); + } + if (rhsBiasedExp == 0x7FF) + { + /* a cannot be NaN, but is b NaN? */ + if (rhsMantissa != 0u) +#ifdef RELAXED_NAN_PROPAGATION + return rhs.data; +#else + return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); +#endif + if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) + return createPreserveBitPattern(0xFFFFFFFFFFFFFFFFull); - nbl::hlsl::uint32_t4 fracUnpacked = impl::mul64to128(lhsHigh, lhsLow, rhsHigh, rhsLow); - fracUnpacked.xy = impl::add64(fracUnpacked.x, fracUnpacked.y, lhsHigh, lhsLow); - fracUnpacked.z |= uint32_t(fracUnpacked.w != 0u); - if (0x00200000u <= fracUnpacked.x) + return createPreserveBitPattern(sign | ieee754::traits::exponentMask); + } + if (lhsBiasedExp == 0) { - fracUnpacked = nbl::hlsl::uint32_t4(impl::shift64ExtraRightJamming(fracUnpacked.x, fracUnpacked.y, fracUnpacked.z, 1), 0u); - ++exp; + if (lhsMantissa == 0u) + return createPreserveBitPattern(sign); + impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); } - - return create(impl::roundAndPackFloat64(sign, exp, fracUnpacked.x, fracUnpacked.y, fracUnpacked.z)); + if (rhsBiasedExp == 0) + { + if (rhsMantissa == 0u) + return createPreserveBitPattern(sign); + impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); + } + + lhsMantissa |= 0x0010000000000000ull; + rhsMantissa = impl::shortShift64Left(rhsMantissa, 12); + + uint32_t4 mantissasPacked; + mantissasPacked.xy = impl::packUint64(lhsMantissa); + mantissasPacked.zw = impl::packUint64(rhsMantissa); + + mantissasPacked = impl::mul64to128(mantissasPacked); + + mantissasPacked.xy = impl::packUint64(impl::unpackUint64(mantissasPacked.xy) + lhsMantissa); + mantissasPacked.z |= uint32_t(mantissasPacked.w != 0u); + if (0x00200000u <= mantissasPacked.x) + { + mantissasPacked = uint32_t4(impl::shift64ExtraRightJamming(mantissasPacked.xyz, 1), 0u); + ++exp; + } + + return createPreserveBitPattern(impl::roundAndPackFloat64(sign, exp, mantissasPacked.xyz)); } // TODO emulated_float64_t operator/(const emulated_float64_t rhs) { - return create(0xdeadbeefbadcaffeull); + return createPreserveBitPattern(0xdeadbeefbadcaffeull); } // relational operators - bool operator==(const emulated_float64_t rhs) { return !(data ^ rhs.data); } - bool operator!=(const emulated_float64_t rhs) { return data ^ rhs.data; } - bool operator<(const emulated_float64_t rhs) { return data < rhs.data; } - bool operator>(const emulated_float64_t rhs) { return data > rhs.data; } - bool operator<=(const emulated_float64_t rhs) { return data <= rhs.data; } - bool operator>=(const emulated_float64_t rhs) { return data >= rhs.data; } + bool operator==(emulated_float64_t rhs) + { + if (isnan(data) || isnan(rhs.data)) + return false; + + const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); + // TODO: check what fast math returns for -0 == 0 + if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) + return true; + + return !(xored.data); + } + bool operator!=(emulated_float64_t rhs) + { + if (isnan(data) || isnan(rhs.data)) + return true; + + const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); + + // TODO: check what fast math returns for -0 == 0 + if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) + return false; + + return xored.data; + } + bool operator<(emulated_float64_t rhs) + { + const uint64_t lhsSign = ieee754::extractSign(data); + const uint64_t rhsSign = ieee754::extractSign(rhs.data); + + // flip bits of negative numbers and flip signs of all numbers + uint64_t lhsFlipped = data ^ ((0x7FFFFFFFFFFFFFFFull * lhsSign) | ieee754::traits::signMask); + uint64_t rhsFlipped = rhs.data ^ ((0x7FFFFFFFFFFFFFFFull * rhsSign) | ieee754::traits::signMask); + + uint64_t diffBits = lhsFlipped ^ rhsFlipped; + + return (lhsFlipped & diffBits) < (rhsFlipped & diffBits); + } + bool operator>(emulated_float64_t rhs) + { +#ifndef __HLSL_VERSION + std::cout << reinterpret_cast(data) << std::endl; + std::cout << reinterpret_cast(rhs.data) << std::endl; +#endif + + const uint64_t lhsSign = ieee754::extractSign(data); + const uint64_t rhsSign = ieee754::extractSign(rhs.data); + + // flip bits of negative numbers and flip signs of all numbers + uint64_t lhsFlipped = data ^ ((0x7FFFFFFFFFFFFFFFull * lhsSign) | ieee754::traits::signMask); + uint64_t rhsFlipped = rhs.data ^ ((0x7FFFFFFFFFFFFFFFull * rhsSign) | ieee754::traits::signMask); + + uint64_t diffBits = lhsFlipped ^ rhsFlipped; + + return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); + } + bool operator<=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } + bool operator>=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) < emulated_float64_t::createPreserveBitPattern(rhs.data)); } //logical operators - bool operator&&(const emulated_float64_t rhs) { return bool(data) && bool(rhs.data); } - bool operator||(const emulated_float64_t rhs) { return bool(data) || bool(rhs.data); } + bool operator&&(emulated_float64_t rhs) { return bool(data) && bool(rhs.data); } + bool operator||(emulated_float64_t rhs) { return bool(data) || bool(rhs.data); } bool operator!() { return !bool(data); } // OMITED OPERATORS @@ -620,45 +727,37 @@ namespace impl // TODO: should modify self? emulated_float64_t flipSign() { - const uint64_t flippedSign = ((~data) & 0x8000000000000000ull); - return create(flippedSign | (data & 0x7FFFFFFFFFFFFFFFull)); + return createPreserveBitPattern(data ^ ieee754::traits::signMask); } bool isNaN() { - return impl::isNaN64(data); + return isnan(bit_cast(data)); } }; - - //_NBL_STATIC_INLINE_CONSTEXPR emulated_float64_t EMULATED_FLOAT64_NAN = emulated_float64_t::create(0.0 / 0.0); namespace ieee754 { - template<> int getExponentBitCnt() { return getExponentBitCnt(); } - template<> int getMantissaBitCnt() { return getMantissaBitCnt(); } - template<> int getExponentBias() { return getExponentBias(); } - template<> unsigned_integer_of_size<8>::type getExponentMask() { return getExponentMask(); } - template<> unsigned_integer_of_size<8>::type getMantissaMask() { return getMantissaMask(); } template<> - unsigned_integer_of_size<8>::type getSignMask() + struct traits_base { - using AsUint = typename unsigned_integer_of_size::type; - return AsUint(0x1) << (sizeof(float64_t) * 4 - 1); - } + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 11; + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52; + }; - template <> + template<> uint32_t extractBiasedExponent(emulated_float64_t x) { return extractBiasedExponent(x.data); } - template <> + template<> int extractExponent(emulated_float64_t x) { return extractExponent(x.data); } - template <> + template<> emulated_float64_t replaceBiasedExponent(emulated_float64_t x, typename unsigned_integer_of_size::type biasedExp) { return emulated_float64_t(replaceBiasedExponent(x.data, biasedExp)); @@ -681,4 +780,10 @@ namespace ieee754 } } +#undef FLOAT_ROUND_NEAREST_EVEN +#undef FLOAT_ROUND_TO_ZERO +#undef FLOAT_ROUND_DOWN +#undef FLOAT_ROUND_UP +#undef FLOAT_ROUNDING_MODE + #endif diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 2cd74caa54..0a4ff1aa7b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -58,78 +58,106 @@ namespace impl template<> uint64_t castBackToFloatType(uint64_t x) { return x; } } - template - int getExponentBitCnt() { staticAssertTmp(false); return 0xdeadbeefu; } - template<> int getExponentBitCnt() { return 5; } - template<> int getExponentBitCnt() { return 8; } - template<> int getExponentBitCnt() { return 11; } +template +struct traits_base +{ + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 0xbeef; + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 0xbeef; +}; - template - int getMantissaBitCnt() { staticAssertTmp(false); return 0xdeadbeefu; /*TODO: add static assert fail to every 0xdeadbeef, fix all static asserts*/ } - template<> int getMantissaBitCnt() { return 10; } - template<> int getMantissaBitCnt() { return 23; } - template<> int getMantissaBitCnt() { return 52; } +template<> +struct traits_base +{ + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 5; + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 10; +}; - template - int getExponentBias() - { - staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); - return (0x1 << (getExponentBitCnt::type>() - 1)) - 1; - } +template<> +struct traits_base +{ + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 8; + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 23; +}; - template - typename unsigned_integer_of_size::type getSignMask() - { - staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); - using AsUint = typename unsigned_integer_of_size::type; - return AsUint(0x1) << (sizeof(T) * 4 - 1); - } +template<> +struct traits_base +{ + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 11; + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52; +}; - template - typename unsigned_integer_of_size::type getExponentMask() { staticAssertTmp(false); return 0xdeadbeefu; } - template<> unsigned_integer_of_size<2>::type getExponentMask() { return 0x7C00; } - template<> unsigned_integer_of_size<4>::type getExponentMask() { return 0x7F800000u; } - template<> unsigned_integer_of_size<8>::type getExponentMask() { return 0x7FF0000000000000ull; } +template +struct traits : traits_base +{ + using bit_rep_t = typename unsigned_integer_of_size::type; + using base_t = traits_base; - template - typename unsigned_integer_of_size::type getMantissaMask() { staticAssertTmp(false); return 0xdeadbeefu; } - template<> unsigned_integer_of_size<2>::type getMantissaMask() { return 0x03FF; } - template<> unsigned_integer_of_size<4>::type getMantissaMask() { return 0x007FFFFFu; } - template<> unsigned_integer_of_size<8>::type getMantissaMask() { return 0x000FFFFFFFFFFFFFull; } + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t signMask = bit_rep_t(0x1u) << (sizeof(Float) * 8 - 1); + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t exponentMask = ((~bit_rep_t(0)) << base_t::mantissaBitCnt) ^ signMask; + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t mantissaMask = (bit_rep_t(0x1u) << base_t::mantissaBitCnt) - 1; + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t exponentBias = (int(0x1) << (base_t::exponentBitCnt - 1)) - 1; +}; - template - uint32_t extractBiasedExponent(T x) - { - using AsUint = typename unsigned_integer_of_size::type; - return glsl::bitfieldExtract(impl::castToUintType(x), getMantissaBitCnt::type>(), getExponentBitCnt::type>()); - } +template +uint32_t extractBiasedExponent(T x) +{ + using AsUint = typename unsigned_integer_of_size::type; + return glsl::bitfieldExtract(impl::castToUintType(x), traits::type>::mantissaBitCnt, traits::type>::exponentBitCnt); +} - template - int extractExponent(T x) - { - return int(extractBiasedExponent(x)) - int(getExponentBias()); - } +template<> +uint32_t extractBiasedExponent(uint64_t x) +{ + const uint32_t highBits = uint32_t(x >> 32); + return glsl::bitfieldExtract(highBits, traits::mantissaBitCnt - 32, traits::exponentBitCnt); +} - template - T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) - { - staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); - return impl::castBackToFloatType(glsl::bitfieldInsert(impl::castToUintType(x), biasedExp, getMantissaBitCnt(), getExponentBitCnt())); - } +template<> +uint32_t extractBiasedExponent(float64_t x) +{ + return extractBiasedExponent(impl::castToUintType(x)); +} - // performs no overflow tests, returns x*exp2(n) - template - T fastMulExp2(T x, int n) - { - return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); - } +template +int extractExponent(T x) +{ + return int(extractBiasedExponent(x)) - int(traits::exponentBias); +} + +template +T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +{ + staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + using AsFloat = typename float_of_size::type; + return impl::castBackToFloatType(glsl::bitfieldInsert(impl::castToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); +} + +// performs no overflow tests, returns x*exp2(n) +template +T fastMulExp2(T x, int n) +{ + return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); +} + +template +typename unsigned_integer_of_size::type extractMantissa(T x) +{ + using AsUint = typename unsigned_integer_of_size::type; + return impl::castToUintType(x) & traits::type>::mantissaMask; +} + +template +typename unsigned_integer_of_size::type extractSign(T x) +{ + return (impl::castToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); +} + +template +typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +{ + return impl::castToUintType(x) & traits::signMask; +} - template - typename unsigned_integer_of_size::type extractMantissa(T x) - { - using AsUint = typename unsigned_integer_of_size::type; - return impl::castToUintType(x) & getMantissaMask::type>(); - } } } } diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl new file mode 100644 index 0000000000..464b4ed9ac --- /dev/null +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -0,0 +1,25 @@ +// Copyright (C) 2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_TGMATH_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TGMATH_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +template +bool isnan(Float val) +{ + using AsUint = typename unsigned_integer_of_size::type; + AsUint asUint = bit_cast(val); + return bool((asUint & ieee754::traits::exponentMask) && (asUint & ieee754::traits::mantissaMask)); +} + +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 3edccd2aa9..e1e71c542c 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -233,6 +233,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_nor LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") #emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath.hlsl") #utility LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754.hlsl") #spirv intrinsics From afef3ec716461639ab7a71ce2da8e00a9847b8c1 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 9 Jul 2024 11:09:28 +0200 Subject: [PATCH 020/432] Updated example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f17208a703..5aaabfd75b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f17208a7039e30af359d0e99cf896f2d013670ff +Subproject commit 5aaabfd75b59b95bd88cf649f8dd6b69bfc54a7e From 10b8a305318d154b169e9beb4db2eea06f9ba611 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 24 Jul 2024 23:47:23 +0200 Subject: [PATCH 021/432] Saving work --- examples_tests | 2 +- include/nbl/builtin/hlsl/algorithm.hlsl | 18 +- include/nbl/builtin/hlsl/cpp_compat.hlsl | 4 + .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 514 ++++-------------- include/nbl/builtin/hlsl/ieee754.hlsl | 7 +- .../hlsl/impl/emulated_float64_t_impl.hlsl | 469 ++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 2 + 7 files changed, 596 insertions(+), 420 deletions(-) create mode 100644 include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl diff --git a/examples_tests b/examples_tests index 5aaabfd75b..25dfb67454 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5aaabfd75b59b95bd88cf649f8dd6b69bfc54a7e +Subproject commit 25dfb6745482473bb63c893f8ae73770d2698983 diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 564ef59f20..276282cedc 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -18,7 +18,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -26,7 +26,7 @@ namespace impl } template<> - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -34,7 +34,7 @@ namespace impl } template<> - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -42,7 +42,7 @@ namespace impl } template<> - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -50,7 +50,7 @@ namespace impl } template<> - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -58,7 +58,7 @@ namespace impl } template<> - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -66,7 +66,7 @@ namespace impl } template<> - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -74,7 +74,7 @@ namespace impl } #else template - NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -82,7 +82,7 @@ namespace impl } template -NBL_CONSTEXPR_STATIC_INLINE void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index a22e8cc0c5..f3cf538e28 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -11,6 +11,8 @@ #define NBL_CONSTEXPR constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline +#define NBL_CONSTEXPR_FUNC constexpr +#define NBL_CONSTEXPR_INLINE_FUNC constexpr inline #define NBL_CONST_MEMBER_FUNC const #define NBL_ALIAS_TEMPLATE_FUNCTION(origFunctionName, functionAlias) \ @@ -41,6 +43,8 @@ using add_pointer = std::add_pointer; #define ARROW .arrow(). #define NBL_CONSTEXPR const static #define NBL_CONSTEXPR_STATIC_INLINE const static +#define NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_INLINE_FUNC #define NBL_CONST_MEMBER_FUNC namespace nbl diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 455c6f67a5..8e8a26d6b3 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -5,35 +5,7 @@ #include #include #include - -// TODO: when it will be possible, use this unions wherever they fit: -/* -* union Mantissa -* { -* struct -* { -* uint32_t highBits; -* uint64_t lowBits; -* }; -* -* uint32_t2 packed; -* }; -* -*/ - -/* -* union Mantissa -* { -* struct -* { -* uint64_t lhs; -* uint64_t rhs; -* }; -* -* uint32_t4 packed; -* }; -* -*/ +#include #define FLOAT_ROUND_NEAREST_EVEN 0 #define FLOAT_ROUND_TO_ZERO 1 @@ -45,386 +17,64 @@ namespace nbl { namespace hlsl { -namespace impl -{ - template - uint64_t promoteToUint64(T val) - { - using AsFloat = typename float_of_size::type; - uint64_t asUint = ieee754::impl::castToUintType(val); - - const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float64_t) - sizeof(T)); - const int64_t newExponent = ieee754::extractExponent(val) + ieee754::traits::exponentBias; - - const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); - const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::exponentBias - ieee754::traits::mantissaBitCnt); - - return sign | exp | mantissa; - }; - - template<> uint64_t promoteToUint64(float64_t val) { return bit_cast(val); } - - nbl::hlsl::uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) - { - uint64_t product = uint64_t(lhs) * uint64_t(rhs); - nbl::hlsl::uint32_t2 output; - output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); - output.y = uint32_t(product & 0x00000000FFFFFFFFull); - return output; - } - - uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) - { -#if defined RELAXED_NAN_PROPAGATION - return lhs | rhs; -#else - const bool lhsIsNaN = isnan(bit_cast(lhs)); - const bool rhsIsNaN = isnan(bit_cast(rhs)); - lhs |= 0x0000000000080000ull; - rhs |= 0x0000000000080000ull; - - return lerp(rhs, lerp(lhs, rhs, rhsIsNaN), lhsIsNaN); -#endif - } - - uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) - { - nbl::hlsl::uint32_t2 z; - - z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; - z.y = zFrac1; - - uint64_t output = 0u; - output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; - output |= uint64_t(z.y); - return output; - } - - uint32_t2 packUint64(uint64_t val) - { - return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); - } - - uint64_t unpackUint64(uint32_t2 val) - { - return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); - } - - nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) - { - nbl::hlsl::uint32_t2 output; - output.y = a1 + b1; - output.x = a0 + b0 + uint32_t(output.y < a1); - - return output; - } - - - nbl::hlsl::uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) - { - nbl::hlsl::uint32_t2 output; - output.y = a1 - b1; - output.x = a0 - b0 - uint32_t(a1 < b1); - - return output; - } - - // TODO: test - int countLeadingZeros32(uint32_t val) - { -#ifndef __HLSL_VERSION - return 31 - nbl::hlsl::findMSB(val); -#else - return 31 - firstbithigh(val); -#endif - } - - uint32_t2 shift64RightJamming(uint32_t2 val, int count) - { - uint32_t2 output; - const int negCount = (-count) & 31; - - output.x = lerp(0u, val.x, count == 0); - output.x = lerp(output.x, (val.x >> count), count < 32); - - output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ - uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); - output.y = lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); - - val.z = lerp(val.z | val.y, val.z, count < 32); - output.x = lerp(output.x, val.x >> count, count < 32); - output.z |= uint32_t(val.z != 0u); - - output.x = lerp(output.x, 0u, (count == 32)); - output.y = lerp(output.y, val.x, (count == 32)); - output.z = lerp(output.z, val.y, (count == 32)); - output.x = lerp(output.x, val.x, (count == 0)); - output.y = lerp(output.y, val.y, (count == 0)); - output.z = lerp(output.z, val.z, (count == 0)); - - return output; - } - - uint64_t shortShift64Left(uint64_t val, int count) - { - const uint32_t2 packed = packUint64(val); - - nbl::hlsl::uint32_t2 output; - output.y = packed.y << count; - // TODO: fix - output.x = lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); - - // y = 3092377600 - // x = 2119009566 - return unpackUint64(output); - }; - - uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) - { - return signShifted + expShifted + mantissa; - } - - uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) - { - bool roundNearestEven; - bool increment; - - roundNearestEven = true; - increment = int(mantissaExtended.z) < 0; - if (!roundNearestEven) - { - if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) - { - increment = false; - } - else - { - if (false) //(zSign != 0u) - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && - // (zFrac2 != 0u); - } - else - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && - // (zFrac2 != 0u); - } - } - } - if (0x7FD <= zExp) - { - if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) - { - if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || - // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || - // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) - { - return packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); - } - - return packFloat64(zSign, 0x7FF, 0u, 0u); - } - } - - if (zExp < 0) - { - mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); - zExp = 0; - - if (roundNearestEven) - { - increment = mantissaExtended.z < 0u; - } - else - { - if (zSign != 0u) - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (mantissaExtended.z != 0u); - } - else - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (mantissaExtended.z != 0u); - } - } - } - - if (increment) - { - const uint64_t added = impl::unpackUint64(uint32_t2(mantissaExtended.xy)) + 1ull; - mantissaExtended.xy = packUint64(added); - mantissaExtended.y &= ~((mantissaExtended.z + uint32_t(mantissaExtended.z == 0u)) & uint32_t(roundNearestEven)); - } - else - { - zExp = lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); - } - - return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); - } - - uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) - { - int shiftCount; - nbl::hlsl::uint32_t3 frac = nbl::hlsl::uint32_t3(frac0, frac1, 0u); - - if (frac.x == 0u) - { - exp -= 32; - frac.x = frac.y; - frac.y = 0u; - } - - shiftCount = countLeadingZeros32(frac.x) - 11; - if (0 <= shiftCount) - { - frac.xy = shortShift64Left(unpackUint64(frac.xy), shiftCount); - } - else - { - frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); - } - exp -= shiftCount; - return roundAndPackFloat64(sign, exp, frac); - } - - void normalizeFloat64Subnormal(uint64_t mantissa, - NBL_REF_ARG(int) outExp, - NBL_REF_ARG(uint64_t) outMantissa) - { - uint32_t2 mantissaPacked = packUint64(mantissa); - int shiftCount; - uint32_t2 temp; - shiftCount = countLeadingZeros32(lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; - outExp = lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - - temp.x = lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); - temp.y = lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); - - shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); - - outMantissa = lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); - } - - } - - struct emulated_float64_t + template + struct emulated_float64_t_impl { using storage_t = uint64_t; storage_t data; // constructors - /*static emulated_float64_t create(uint16_t val) + /*static emulated_float64_t_impl create(uint16_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return emulated_float64_t_impl(bit_cast(float64_t(val))); }*/ - static emulated_float64_t create(int32_t val) + static emulated_float64_t_impl create(int32_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return emulated_float64_t_impl(bit_cast(float64_t(val))); } - static emulated_float64_t create(int64_t val) + static emulated_float64_t_impl create(int64_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return emulated_float64_t_impl(bit_cast(float64_t(val))); } - static emulated_float64_t create(uint32_t val) + static emulated_float64_t_impl create(uint32_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return emulated_float64_t_impl(bit_cast(float64_t(val))); } - static emulated_float64_t create(uint64_t val) + static emulated_float64_t_impl create(uint64_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return emulated_float64_t_impl(bit_cast(float64_t(val))); } - static emulated_float64_t create(float64_t val) + static emulated_float64_t_impl create(float64_t val) { - return emulated_float64_t(bit_cast(val)); + return emulated_float64_t_impl(bit_cast(val)); } // TODO: unresolved external symbol imath_half_to_float_table - /*static emulated_float64_t create(float16_t val) + /*static emulated_float64_t_impl create(float16_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return emulated_float64_t_impl(bit_cast(float64_t(val))); }*/ - static emulated_float64_t create(float32_t val) + static emulated_float64_t_impl create(float32_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return emulated_float64_t_impl(bit_cast(float64_t(val))); } - static emulated_float64_t createPreserveBitPattern(uint64_t val) + static emulated_float64_t_impl createPreserveBitPattern(uint64_t val) { - return emulated_float64_t(val); + return emulated_float64_t_impl(val); } // arithmetic operators - emulated_float64_t operator+(const emulated_float64_t rhs) + emulated_float64_t_impl operator+(const emulated_float64_t_impl rhs) { - emulated_float64_t retval = createPreserveBitPattern(0u); + emulated_float64_t_impl retval = createPreserveBitPattern(0u); uint64_t mantissa; uint32_t3 mantissaExtended; @@ -502,7 +152,7 @@ namespace impl if (expDiff != 0) { - nbl::hlsl::uint32_t2 frac; + uint32_t2 frac; if (expDiff < 0) { @@ -568,17 +218,17 @@ namespace impl } } - emulated_float64_t operator-(emulated_float64_t rhs) + emulated_float64_t_impl operator-(emulated_float64_t_impl rhs) { - emulated_float64_t lhs = createPreserveBitPattern(data); - emulated_float64_t rhsFlipped = rhs.flipSign(); + emulated_float64_t_impl lhs = createPreserveBitPattern(data); + emulated_float64_t_impl rhsFlipped = rhs.flipSign(); return lhs + rhsFlipped; } - emulated_float64_t operator*(emulated_float64_t rhs) + emulated_float64_t_impl operator*(emulated_float64_t_impl rhs) { - emulated_float64_t retval = emulated_float64_t::createPreserveBitPattern(0u); + emulated_float64_t_impl retval = emulated_float64_t_impl::createPreserveBitPattern(0u); uint64_t lhsSign = data & ieee754::traits::signMask; uint64_t rhsSign = rhs.data & ieee754::traits::signMask; @@ -587,7 +237,7 @@ namespace impl int lhsBiasedExp = ieee754::extractBiasedExponent(data); int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - int exp = int(lhsBiasedExp + rhsBiasedExp) - 0x400; + int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; if (lhsBiasedExp == 0x7FF) @@ -628,51 +278,97 @@ namespace impl impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); } - lhsMantissa |= 0x0010000000000000ull; - rhsMantissa = impl::shortShift64Left(rhsMantissa, 12); + if (false) + { + lhsMantissa |= 1ull << 52; + rhsMantissa = impl::shortShift64Left(rhsMantissa, 12); + + uint32_t4 mantissasPacked; + mantissasPacked.xy = impl::packUint64(lhsMantissa); + mantissasPacked.zw = impl::packUint64(rhsMantissa); - uint32_t4 mantissasPacked; - mantissasPacked.xy = impl::packUint64(lhsMantissa); - mantissasPacked.zw = impl::packUint64(rhsMantissa); + mantissasPacked = impl::mul64to128(mantissasPacked); - mantissasPacked = impl::mul64to128(mantissasPacked); + mantissasPacked.xy = impl::packUint64(impl::unpackUint64(mantissasPacked.xy) + lhsMantissa); + mantissasPacked.z |= uint32_t(mantissasPacked.w != 0u); + if (0x00200000u <= mantissasPacked.x) + { + mantissasPacked = uint32_t4(impl::shift64ExtraRightJamming(mantissasPacked.xyz, 1), 0u); + ++exp; + } - mantissasPacked.xy = impl::packUint64(impl::unpackUint64(mantissasPacked.xy) + lhsMantissa); - mantissasPacked.z |= uint32_t(mantissasPacked.w != 0u); - if (0x00200000u <= mantissasPacked.x) + return createPreserveBitPattern(impl::roundAndPackFloat64(sign, exp, mantissasPacked.xyz)); + } + else { - mantissasPacked = uint32_t4(impl::shift64ExtraRightJamming(mantissasPacked.xyz, 1), 0u); - ++exp; + lhsMantissa |= 1ull << 52; + rhsMantissa |= 1ull << 52; + + uint32_t2 lhsPacked = impl::packUint64(lhsMantissa); + uint32_t2 rhsPacked = impl::packUint64(rhsMantissa); + uint64_t lhsHigh = lhsPacked.x; + uint64_t lhsLow = lhsPacked.y; + uint64_t rhsHigh = rhsPacked.x; + uint64_t rhsLow = rhsPacked.y; + + //((hi_lhs * hi_rhs) << 11) + ((hi_lhs * lo_rhs + lo_lhs * hi_rhs) >> 37) + + uint64_t newPseudoMantissa = ((lhsHigh * rhsHigh) << 11) + ((lhsHigh * rhsLow + lhsLow * rhsHigh) >> 37); + newPseudoMantissa <<= 1; + //newPseudoMantissa >>= 52; + /*if (newPseudoMantissa >= (1ull << 52)) + { + newPseudoMantissa >>= 1; + ++exp; + }*/ + + return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa & ieee754::traits::mantissaMask)); } - return createPreserveBitPattern(impl::roundAndPackFloat64(sign, exp, mantissasPacked.xyz)); + } - // TODO - emulated_float64_t operator/(const emulated_float64_t rhs) + emulated_float64_t_impl operator/(const emulated_float64_t_impl rhs) { - return createPreserveBitPattern(0xdeadbeefbadcaffeull); + + // TODO: maybe add function to extract real mantissa + const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); + const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); + + + const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; + int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; + uint64_t mantissa = impl::divMantissas(lhsRealMantissa, rhsRealMantissa); + + if (mantissa & (1ULL << (ieee754::traits::mantissaBitCnt + 1))) + { + mantissa >>= 1; + ++exp; + } + + return createPreserveBitPattern(impl::assembleFloat64(sign, exp, mantissa & ieee754::traits::mantissaMask)); } + // relational operators - bool operator==(emulated_float64_t rhs) + bool operator==(emulated_float64_t_impl rhs) { - if (isnan(data) || isnan(rhs.data)) + if (FastMath && (isnan(data) || isnan(rhs.data))) return false; - const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); + const emulated_float64_t_impl xored = emulated_float64_t_impl::createPreserveBitPattern(data ^ rhs.data); // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) return true; return !(xored.data); } - bool operator!=(emulated_float64_t rhs) + bool operator!=(emulated_float64_t_impl rhs) { - if (isnan(data) || isnan(rhs.data)) + if (FastMath && (isnan(data) || isnan(rhs.data))) return true; - const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); + const emulated_float64_t_impl xored = emulated_float64_t_impl::createPreserveBitPattern(data ^ rhs.data); // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) @@ -680,7 +376,7 @@ namespace impl return xored.data; } - bool operator<(emulated_float64_t rhs) + bool operator<(emulated_float64_t_impl rhs) { const uint64_t lhsSign = ieee754::extractSign(data); const uint64_t rhsSign = ieee754::extractSign(rhs.data); @@ -693,7 +389,7 @@ namespace impl return (lhsFlipped & diffBits) < (rhsFlipped & diffBits); } - bool operator>(emulated_float64_t rhs) + bool operator>(emulated_float64_t_impl rhs) { const uint64_t lhsSign = ieee754::extractSign(data); const uint64_t rhsSign = ieee754::extractSign(rhs.data); @@ -706,12 +402,12 @@ namespace impl return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); } - bool operator<=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } - bool operator>=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) < emulated_float64_t::createPreserveBitPattern(rhs.data)); } + bool operator<=(emulated_float64_t_impl rhs) { return !(emulated_float64_t_impl::createPreserveBitPattern(data) > emulated_float64_t_impl::createPreserveBitPattern(rhs.data)); } + bool operator>=(emulated_float64_t_impl rhs) { return !(emulated_float64_t_impl::createPreserveBitPattern(data) < emulated_float64_t_impl::createPreserveBitPattern(rhs.data)); } //logical operators - bool operator&&(emulated_float64_t rhs) { return bool(data) && bool(rhs.data); } - bool operator||(emulated_float64_t rhs) { return bool(data) || bool(rhs.data); } + bool operator&&(emulated_float64_t_impl rhs) { return bool(data) && bool(rhs.data); } + bool operator||(emulated_float64_t_impl rhs) { return bool(data) || bool(rhs.data); } bool operator!() { return !bool(data); } // OMITED OPERATORS @@ -720,7 +416,7 @@ namespace impl // - access operators (dereference and addressof) not supported in HLSL // TODO: should modify self? - emulated_float64_t flipSign() + emulated_float64_t_impl flipSign() { return createPreserveBitPattern(data ^ ieee754::traits::signMask); } @@ -731,6 +427,8 @@ namespace impl } }; + using emulated_float64_t = emulated_float64_t_impl; + namespace ieee754 { template<> diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 0a4ff1aa7b..b4a65d785d 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -61,8 +61,8 @@ namespace impl template struct traits_base { - NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 0xbeef; - NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 0xbeef; + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = int16_t(0xbeef); + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = int16_t(0xbeef); }; template<> @@ -89,6 +89,8 @@ struct traits_base template struct traits : traits_base { + //static_assert(is_same_v || is_same_v || is_same_v); + using bit_rep_t = typename unsigned_integer_of_size::type; using base_t = traits_base; @@ -96,6 +98,7 @@ struct traits : traits_base NBL_CONSTEXPR_STATIC_INLINE bit_rep_t exponentMask = ((~bit_rep_t(0)) << base_t::mantissaBitCnt) ^ signMask; NBL_CONSTEXPR_STATIC_INLINE bit_rep_t mantissaMask = (bit_rep_t(0x1u) << base_t::mantissaBitCnt) - 1; NBL_CONSTEXPR_STATIC_INLINE bit_rep_t exponentBias = (int(0x1) << (base_t::exponentBitCnt - 1)) - 1; + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t inf = exponentMask; }; template diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl new file mode 100644 index 0000000000..33ea303968 --- /dev/null +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -0,0 +1,469 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ + +#include +#include +#include +#include + +// TODO: when it will be possible, use this unions wherever they fit: +/* +* union Mantissa +* { +* struct +* { +* uint32_t highBits; +* uint64_t lowBits; +* }; +* +* uint32_t2 packed; +* }; +* +*/ + +/* +* union Mantissa +* { +* struct +* { +* uint64_t lhs; +* uint64_t rhs; +* }; +* +* uint32_t4 packed; +* }; +* +*/ + +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + struct uint128Mantissa + { + uint64_t highBits; + uint64_t lowBits; + + static uint128Mantissa create(uint64_t mantissa64) + { + uint128Mantissa output; + output.highBits = 0u; + output.lowBits = mantissa64; + + return output; + } + + void shiftLeftByOne() + { + highBits = (highBits << 1) | (lowBits >> 63); + lowBits <<= 1; + } + + void shiftRightByOne() + { + highBits >>= 1; + lowBits = (highBits >> 63) | (lowBits >> 1); + } + + // TODO: more efficient comparisions + bool operator>=(uint128Mantissa rhs) + { + return (highBits > rhs.highBits) || (highBits == rhs.highBits && lowBits >= rhs.lowBits); + } + + bool operator<(uint128Mantissa rhs) + { + return (highBits < rhs.highBits) || (highBits == rhs.highBits && lowBits < rhs.lowBits); + } + + uint128Mantissa operator-(uint128Mantissa rhs) + { + uint128Mantissa result; + result.lowBits = lowBits - rhs.lowBits; + result.highBits = highBits - rhs.highBits - (lowBits < rhs.lowBits); + return result; + } + + static uint128Mantissa createAsShiftedByMantissaBitCnt(uint64_t mantissa64) + { + uint128Mantissa output; + output.highBits = mantissa64 >> (64 - nbl::hlsl::ieee754::traits::mantissaBitCnt); + output.lowBits = mantissa64 << nbl::hlsl::ieee754::traits::mantissaBitCnt; + + return output; + } + + uint64_t divByFloat64(uint64_t floatRep) + { + uint128Mantissa output = create(0); + + uint128Mantissa divisor = create(floatRep); + uint128Mantissa remainder; + remainder.highBits = highBits; + remainder.lowBits = lowBits; + uint128Mantissa one = create(1); + + + while ((divisor.highBits < (1ULL << 63)) && (divisor < remainder)) + { + divisor.shiftLeftByOne(); + one.shiftLeftByOne(); + } + + while (one.highBits != 0 || one.lowBits != 0) + { + if (remainder >= divisor) + { + remainder = remainder - divisor; + output.highBits |= one.highBits; + output.lowBits |= one.lowBits; + } + output.shiftRightByOne(); + one.shiftRightByOne(); + } + + return output.lowBits; + } + }; + + uint64_t divMantissas(uint64_t lhs, uint64_t rhs) + { + uint128Mantissa lhs128 = uint128Mantissa::createAsShiftedByMantissaBitCnt(lhs); + return lhs128.divByFloat64(rhs); + } + + template + uint64_t promoteToUint64(T val) + { + using AsFloat = typename float_of_size::type; + uint64_t asUint = ieee754::impl::castToUintType(val); + + const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float64_t) - sizeof(T)); + const int64_t newExponent = ieee754::extractExponent(val) + ieee754::traits::exponentBias; + + const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::exponentBias - ieee754::traits::mantissaBitCnt); + + return sign | exp | mantissa; + }; + + template<> uint64_t promoteToUint64(float64_t val) { return bit_cast(val); } + + uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) + { + uint64_t product = uint64_t(lhs) * uint64_t(rhs); + nbl::hlsl::uint32_t2 output; + output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); + output.y = uint32_t(product & 0x00000000FFFFFFFFull); + return output; + } + + uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) + { +#if defined RELAXED_NAN_PROPAGATION + return lhs | rhs; +#else + const bool lhsIsNaN = isnan(bit_cast(lhs)); + const bool rhsIsNaN = isnan(bit_cast(rhs)); + lhs |= 0x0000000000080000ull; + rhs |= 0x0000000000080000ull; + + return lerp(rhs, lerp(lhs, rhs, rhsIsNaN), lhsIsNaN); +#endif + } + + uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) + { + nbl::hlsl::uint32_t2 z; + + z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; + z.y = zFrac1; + + uint64_t output = 0u; + output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; + output |= uint64_t(z.y); + return output; + } + + uint32_t2 packUint64(uint64_t val) + { + return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); + } + + uint64_t unpackUint64(uint32_t2 val) + { + return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); + } + + nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + { + nbl::hlsl::uint32_t2 output; + output.y = a1 + b1; + output.x = a0 + b0 + uint32_t(output.y < a1); + + return output; + } + + + nbl::hlsl::uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + { + nbl::hlsl::uint32_t2 output; + output.y = a1 - b1; + output.x = a0 - b0 - uint32_t(a1 < b1); + + return output; + } + + // TODO: test + int countLeadingZeros32(uint32_t val) + { +#ifndef __HLSL_VERSION + return 31 - nbl::hlsl::findMSB(val); +#else + return 31 - firstbithigh(val); +#endif + } + + uint32_t2 shift64RightJamming(uint32_t2 val, int count) + { + uint32_t2 output; + const int negCount = (-count) & 31; + + output.x = lerp(0u, val.x, count == 0); + output.x = lerp(output.x, (val.x >> count), count < 32); + + output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ + uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); + output.y = lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); + + val.z = lerp(val.z | val.y, val.z, count < 32); + output.x = lerp(output.x, val.x >> count, count < 32); + output.z |= uint32_t(val.z != 0u); + + output.x = lerp(output.x, 0u, (count == 32)); + output.y = lerp(output.y, val.x, (count == 32)); + output.z = lerp(output.z, val.y, (count == 32)); + output.x = lerp(output.x, val.x, (count == 0)); + output.y = lerp(output.y, val.y, (count == 0)); + output.z = lerp(output.z, val.z, (count == 0)); + + return output; + } + + uint64_t shortShift64Left(uint64_t val, int count) + { + const uint32_t2 packed = packUint64(val); + + nbl::hlsl::uint32_t2 output; + output.y = packed.y << count; + // TODO: fix + output.x = lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); + + // y = 3092377600 + // x = 2119009566 + return unpackUint64(output); + }; + + uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) + { + return signShifted + expShifted + mantissa; + } + + uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) + { + bool roundNearestEven; + bool increment; + + roundNearestEven = true; + increment = int(mantissaExtended.z) < 0; + if (!roundNearestEven) + { + if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) + { + increment = false; + } + else + { + if (false) //(zSign != 0u) + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && + // (zFrac2 != 0u); + } + else + { + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && + // (zFrac2 != 0u); + } + } + } + if (0x7FD <= zExp) + { + if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) + { + if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || + // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || + // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) + { + return packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); + } + + return packFloat64(zSign, 0x7FF, 0u, 0u); + } + } + + if (zExp < 0) + { + mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); + zExp = 0; + + if (roundNearestEven) + { + increment = mantissaExtended.z < 0u; + } + else + { + if (zSign != 0u) + { + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (mantissaExtended.z != 0u); + } + else + { + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (mantissaExtended.z != 0u); + } + } + } + + if (increment) + { + const uint64_t added = impl::unpackUint64(uint32_t2(mantissaExtended.xy)) + 1ull; + mantissaExtended.xy = packUint64(added); + mantissaExtended.y &= ~((mantissaExtended.z + uint32_t(mantissaExtended.z == 0u)) & uint32_t(roundNearestEven)); + } + else + { + zExp = lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); + } + + return assembleFloat64(zSign, uint64_t(zExp) << nbl::hlsl::ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); + } + + uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) + { + int shiftCount; + nbl::hlsl::uint32_t3 frac = nbl::hlsl::uint32_t3(frac0, frac1, 0u); + + if (frac.x == 0u) + { + exp -= 32; + frac.x = frac.y; + frac.y = 0u; + } + + shiftCount = countLeadingZeros32(frac.x) - 11; + if (0 <= shiftCount) + { + frac.xy = shortShift64Left(unpackUint64(frac.xy), shiftCount); + } + else + { + frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); + } + exp -= shiftCount; + return roundAndPackFloat64(sign, exp, frac); + } + + void normalizeFloat64Subnormal(uint64_t mantissa, + NBL_REF_ARG(int) outExp, + NBL_REF_ARG(uint64_t) outMantissa) + { + uint32_t2 mantissaPacked = packUint64(mantissa); + int shiftCount; + uint32_t2 temp; + shiftCount = countLeadingZeros32(lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; + outExp = lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); + + temp.x = lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); + temp.y = lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); + + shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); + + outMantissa = lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); + } + + bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) + { + lhs ^= ieee754::traits::signMask; + rhs ^= ieee754::traits::signMask; + + bool output = lhs == rhs && ieee754::traits::inf; + bool output = output && ((lhs & (~ieee754::traits::signMask)) == ieee754::traits::inf); + + return output; + } + +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e1e71c542c..9b8676160f 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -231,6 +231,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_nor # HLSL LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") +#impl +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/impl/emulated_float64_t_impl.hlsl") #emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath.hlsl") From a0dfdb22fae3624fdd114fd01ab5f40d2dbcb612 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 30 Jul 2024 18:27:05 +0200 Subject: [PATCH 022/432] Fixes --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 396 +++++++++--------- include/nbl/builtin/hlsl/ieee754.hlsl | 4 +- .../hlsl/impl/emulated_float64_t_impl.hlsl | 235 ++++++----- include/nbl/builtin/hlsl/tgmath.hlsl | 2 +- 5 files changed, 338 insertions(+), 301 deletions(-) diff --git a/examples_tests b/examples_tests index 25dfb67454..c53f5c8186 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 25dfb6745482473bb63c893f8ae73770d2698983 +Subproject commit c53f5c8186780516833b2ffadba277c131bbc9fb diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 8e8a26d6b3..fd156af407 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -1,80 +1,70 @@ #ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ #define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ -#include -#include -#include -#include #include - -#define FLOAT_ROUND_NEAREST_EVEN 0 -#define FLOAT_ROUND_TO_ZERO 1 -#define FLOAT_ROUND_DOWN 2 -#define FLOAT_ROUND_UP 3 -#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN namespace nbl { namespace hlsl { - template - struct emulated_float64_t_impl + template + struct emulated_float64_t { using storage_t = uint64_t; storage_t data; // constructors - /*static emulated_float64_t_impl create(uint16_t val) + /*static emulated_float64_t create(uint16_t val) { - return emulated_float64_t_impl(bit_cast(float64_t(val))); + return emulated_float64_t(bit_cast(float64_t(val))); }*/ - static emulated_float64_t_impl create(int32_t val) + static emulated_float64_t create(int32_t val) { - return emulated_float64_t_impl(bit_cast(float64_t(val))); + return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t_impl create(int64_t val) + static emulated_float64_t create(int64_t val) { - return emulated_float64_t_impl(bit_cast(float64_t(val))); + return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t_impl create(uint32_t val) + static emulated_float64_t create(uint32_t val) { - return emulated_float64_t_impl(bit_cast(float64_t(val))); + return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t_impl create(uint64_t val) + static emulated_float64_t create(uint64_t val) { - return emulated_float64_t_impl(bit_cast(float64_t(val))); + return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t_impl create(float64_t val) + static emulated_float64_t create(float64_t val) { - return emulated_float64_t_impl(bit_cast(val)); + return emulated_float64_t(bit_cast(val)); } // TODO: unresolved external symbol imath_half_to_float_table - /*static emulated_float64_t_impl create(float16_t val) + /*static emulated_float64_t create(float16_t val) { - return emulated_float64_t_impl(bit_cast(float64_t(val))); + return emulated_float64_t(bit_cast(float64_t(val))); }*/ - static emulated_float64_t_impl create(float32_t val) + static emulated_float64_t create(float32_t val) { - return emulated_float64_t_impl(bit_cast(float64_t(val))); + return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t_impl createPreserveBitPattern(uint64_t val) + static emulated_float64_t createPreserveBitPattern(uint64_t val) { - return emulated_float64_t_impl(val); + return emulated_float64_t(val); } // arithmetic operators - emulated_float64_t_impl operator+(const emulated_float64_t_impl rhs) + emulated_float64_t operator+(const emulated_float64_t rhs) { - emulated_float64_t_impl retval = createPreserveBitPattern(0u); + emulated_float64_t retval = createPreserveBitPattern(0u); uint64_t mantissa; uint32_t3 mantissaExtended; @@ -93,11 +83,11 @@ namespace hlsl { if (expDiff == 0) { - //if (lhsExp == 0x7FF) - //{ - // bool propagate = (lhsMantissa | rhsMantissa) != 0u; - // return createPreserveBitPattern(lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); - //} + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = (lhsMantissa | rhsMantissa) != 0u; + return createPreserveBitPattern(lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } mantissa = lhsMantissa + rhsMantissa; if (lhsBiasedExp == 0) @@ -143,7 +133,7 @@ namespace hlsl } // cannot happen but compiler cries about not every path returning value - return createPreserveBitPattern(0xdeadbeefbadcaffeull); + return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); } else { @@ -161,11 +151,11 @@ namespace hlsl lhsSign ^= ieee754::traits::signMask; } - //if (lhsExp == 0x7FF) - //{ - // bool propagate = (lhsHigh | lhsLow) != 0u; - // return nbl::hlsl::lerp(__packFloat64(lhsSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); - //} + if (lhsBiasedExp == 0x7FF) + { + bool propagate = lhsMantissa != 0u; + return createPreserveBitPattern(lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + } expDiff = lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); rhsMantissa = lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); @@ -176,11 +166,11 @@ namespace hlsl --biasedExp; return createPreserveBitPattern(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); } - //if (lhsExp == 0x7FF) - //{ - // bool propagate = ((lhsHigh | rhsHigh) | (lhsLow | rhsLow)) != 0u; - // return nbl::hlsl::lerp(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); - //} + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; + return createPreserveBitPattern(lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } rhsBiasedExp = lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); lhsBiasedExp = lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); @@ -218,17 +208,17 @@ namespace hlsl } } - emulated_float64_t_impl operator-(emulated_float64_t_impl rhs) + emulated_float64_t operator-(emulated_float64_t rhs) { - emulated_float64_t_impl lhs = createPreserveBitPattern(data); - emulated_float64_t_impl rhsFlipped = rhs.flipSign(); + emulated_float64_t lhs = createPreserveBitPattern(data); + emulated_float64_t rhsFlipped = rhs.flipSign(); return lhs + rhsFlipped; } - emulated_float64_t_impl operator*(emulated_float64_t_impl rhs) + emulated_float64_t operator*(emulated_float64_t rhs) { - emulated_float64_t_impl retval = emulated_float64_t_impl::createPreserveBitPattern(0u); + emulated_float64_t retval = emulated_float64_t::createPreserveBitPattern(0u); uint64_t lhsSign = data & ieee754::traits::signMask; uint64_t rhsSign = rhs.data & ieee754::traits::signMask; @@ -239,145 +229,129 @@ namespace hlsl int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - - if (lhsBiasedExp == 0x7FF) + if (!FastMath) { - if ((lhsMantissa != 0u) || - ((rhsBiasedExp == 0x7FF) && (rhsMantissa != 0u))) { - return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); - } - if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) - return createPreserveBitPattern(0xFFFFFFFFFFFFFFFFull); + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + if ((lhsMantissa != 0u) || ((rhsBiasedExp == 0x7FF) && (rhsMantissa != 0u))) + return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); + if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) + return createPreserveBitPattern(ieee754::traits::quietNaN); - return createPreserveBitPattern(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); - } - if (rhsBiasedExp == 0x7FF) - { - /* a cannot be NaN, but is b NaN? */ - if (rhsMantissa != 0u) + return createPreserveBitPattern(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); + } + if (rhsBiasedExp == ieee754::traits::specialValueExp) + { + /* a cannot be NaN, but is b NaN? */ + if (rhsMantissa != 0u) #ifdef RELAXED_NAN_PROPAGATION - return rhs.data; + return rhs.data; #else - return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); + return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); #endif - if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) - return createPreserveBitPattern(0xFFFFFFFFFFFFFFFFull); + if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) + return createPreserveBitPattern(ieee754::traits::quietNaN); - return createPreserveBitPattern(sign | ieee754::traits::exponentMask); - } - if (lhsBiasedExp == 0) - { - if (lhsMantissa == 0u) - return createPreserveBitPattern(sign); - impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); - } - if (rhsBiasedExp == 0) - { - if (rhsMantissa == 0u) - return createPreserveBitPattern(sign); - impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); - } - - if (false) - { - lhsMantissa |= 1ull << 52; - rhsMantissa = impl::shortShift64Left(rhsMantissa, 12); - - uint32_t4 mantissasPacked; - mantissasPacked.xy = impl::packUint64(lhsMantissa); - mantissasPacked.zw = impl::packUint64(rhsMantissa); - - mantissasPacked = impl::mul64to128(mantissasPacked); - - mantissasPacked.xy = impl::packUint64(impl::unpackUint64(mantissasPacked.xy) + lhsMantissa); - mantissasPacked.z |= uint32_t(mantissasPacked.w != 0u); - if (0x00200000u <= mantissasPacked.x) + return createPreserveBitPattern(sign | ieee754::traits::exponentMask); + } + if (lhsBiasedExp == 0) { - mantissasPacked = uint32_t4(impl::shift64ExtraRightJamming(mantissasPacked.xyz, 1), 0u); - ++exp; + if (lhsMantissa == 0u) + return createPreserveBitPattern(sign); + impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); } - - return createPreserveBitPattern(impl::roundAndPackFloat64(sign, exp, mantissasPacked.xyz)); - } - else - { - lhsMantissa |= 1ull << 52; - rhsMantissa |= 1ull << 52; - - uint32_t2 lhsPacked = impl::packUint64(lhsMantissa); - uint32_t2 rhsPacked = impl::packUint64(rhsMantissa); - uint64_t lhsHigh = lhsPacked.x; - uint64_t lhsLow = lhsPacked.y; - uint64_t rhsHigh = rhsPacked.x; - uint64_t rhsLow = rhsPacked.y; - - //((hi_lhs * hi_rhs) << 11) + ((hi_lhs * lo_rhs + lo_lhs * hi_rhs) >> 37) - - uint64_t newPseudoMantissa = ((lhsHigh * rhsHigh) << 11) + ((lhsHigh * rhsLow + lhsLow * rhsHigh) >> 37); - newPseudoMantissa <<= 1; - //newPseudoMantissa >>= 52; - /*if (newPseudoMantissa >= (1ull << 52)) + if (rhsBiasedExp == 0) { - newPseudoMantissa >>= 1; - ++exp; - }*/ + if (rhsMantissa == 0u) + return createPreserveBitPattern(sign); + impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); + } + } + + const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); + const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); + const uint64_t hi_r = (rhsMantissa >> 21) | (1ull << 31); + const uint64_t lo_r = rhsMantissa & ((1ull << 21) - 1); - return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa & ieee754::traits::mantissaMask)); + //const uint64_t RoundToNearest = (1ull << 31) - 1; + uint64_t newPseudoMantissa = ((hi_l * hi_r) >> 10) + ((hi_l * lo_r + lo_l * hi_r/* + RoundToNearest*/) >> 31); + + if (newPseudoMantissa & (0x1ull << 53)) + { + newPseudoMantissa >>= 1; + ++exp; } + newPseudoMantissa &= (ieee754::traits::mantissaMask); - + return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); } - emulated_float64_t_impl operator/(const emulated_float64_t_impl rhs) + emulated_float64_t operator/(const emulated_float64_t rhs) { - // TODO: maybe add function to extract real mantissa const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); - const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; - uint64_t mantissa = impl::divMantissas(lhsRealMantissa, rhsRealMantissa); - if (mantissa & (1ULL << (ieee754::traits::mantissaBitCnt + 1))) + uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy52(lhsRealMantissa); + uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa).x; + + if (mantissa & (0x1ull << 53)) { - mantissa >>= 1; ++exp; } + else + { + mantissa >>= 1; + } - return createPreserveBitPattern(impl::assembleFloat64(sign, exp, mantissa & ieee754::traits::mantissaMask)); - } + mantissa &= ieee754::traits::mantissaMask; + return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); + } // relational operators - bool operator==(emulated_float64_t_impl rhs) + bool operator==(emulated_float64_t rhs) { - if (FastMath && (isnan(data) || isnan(rhs.data))) + if (!FastMath && (isnan(data) || isnan(rhs.data))) return false; + // TODO: i'm not sure about this one + if (!FastMath && impl::areBothZero(data, rhs.data)) + return true; - const emulated_float64_t_impl xored = emulated_float64_t_impl::createPreserveBitPattern(data ^ rhs.data); + const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) return true; return !(xored.data); } - bool operator!=(emulated_float64_t_impl rhs) + bool operator!=(emulated_float64_t rhs) { - if (FastMath && (isnan(data) || isnan(rhs.data))) + if (!FastMath && (isnan(data) || isnan(rhs.data))) return true; + // TODO: i'm not sure about this one + if (!FastMath && impl::areBothSameSignZero(data, rhs.data)) + return false; - const emulated_float64_t_impl xored = emulated_float64_t_impl::createPreserveBitPattern(data ^ rhs.data); - + const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) return false; return xored.data; } - bool operator<(emulated_float64_t_impl rhs) + bool operator<(emulated_float64_t rhs) { + if (!FastMath && (isnan(data) || isnan(rhs.data))) + return false; + if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) + return false; + if (!FastMath && impl::areBothZero(data, rhs.data)) + return false; + const uint64_t lhsSign = ieee754::extractSign(data); const uint64_t rhsSign = ieee754::extractSign(rhs.data); @@ -389,8 +363,15 @@ namespace hlsl return (lhsFlipped & diffBits) < (rhsFlipped & diffBits); } - bool operator>(emulated_float64_t_impl rhs) + bool operator>(emulated_float64_t rhs) { + if (!FastMath && (isnan(data) || isnan(rhs.data))) + return true; + if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) + return false; + if (!FastMath && impl::areBothZero(data, rhs.data)) + return false; + const uint64_t lhsSign = ieee754::extractSign(data); const uint64_t rhsSign = ieee754::extractSign(rhs.data); @@ -402,12 +383,12 @@ namespace hlsl return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); } - bool operator<=(emulated_float64_t_impl rhs) { return !(emulated_float64_t_impl::createPreserveBitPattern(data) > emulated_float64_t_impl::createPreserveBitPattern(rhs.data)); } - bool operator>=(emulated_float64_t_impl rhs) { return !(emulated_float64_t_impl::createPreserveBitPattern(data) < emulated_float64_t_impl::createPreserveBitPattern(rhs.data)); } + bool operator<=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } + bool operator>=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) < emulated_float64_t::createPreserveBitPattern(rhs.data)); } //logical operators - bool operator&&(emulated_float64_t_impl rhs) { return bool(data) && bool(rhs.data); } - bool operator||(emulated_float64_t_impl rhs) { return bool(data) || bool(rhs.data); } + bool operator&&(emulated_float64_t rhs) { return bool(data) && bool(rhs.data); } + bool operator||(emulated_float64_t rhs) { return bool(data) || bool(rhs.data); } bool operator!() { return !bool(data); } // OMITED OPERATORS @@ -416,7 +397,7 @@ namespace hlsl // - access operators (dereference and addressof) not supported in HLSL // TODO: should modify self? - emulated_float64_t_impl flipSign() + emulated_float64_t flipSign() { return createPreserveBitPattern(data ^ ieee754::traits::signMask); } @@ -427,47 +408,83 @@ namespace hlsl } }; - using emulated_float64_t = emulated_float64_t_impl; +#define COMMA , +#define IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(Type) \ +template<>\ +struct traits_base\ +{\ + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 11;\ + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52;\ +};\ +template<>\ +uint32_t extractBiasedExponent(Type x)\ +{\ + return extractBiasedExponent(x.data);\ +}\ +\ +template<>\ +int extractExponent(Type x)\ +{\ + return extractExponent(x.data);\ +}\ +\ +template<>\ +Type replaceBiasedExponent(Type x, typename unsigned_integer_of_size::type biasedExp)\ +{\ + return Type(replaceBiasedExponent(x.data, biasedExp));\ +}\ +\ +template <>\ +Type fastMulExp2(Type x, int n)\ +{\ + return Type(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ +}\ +\ +template <>\ +unsigned_integer_of_size::type extractMantissa(Type x)\ +{\ + return extractMantissa(x.data);\ +}\ + + namespace ieee754 { - template<> - struct traits_base - { - NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 11; - NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52; - }; - - template<> - uint32_t extractBiasedExponent(emulated_float64_t x) - { - return extractBiasedExponent(x.data); - } - - template<> - int extractExponent(emulated_float64_t x) - { - return extractExponent(x.data); - } - - template<> - emulated_float64_t replaceBiasedExponent(emulated_float64_t x, typename unsigned_integer_of_size::type biasedExp) - { - return emulated_float64_t(replaceBiasedExponent(x.data, biasedExp)); - } - - //// performs no overflow tests, returns x*exp2(n) - template <> - emulated_float64_t fastMulExp2(emulated_float64_t x, int n) - { - return emulated_float64_t(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n))); - } - - template <> - unsigned_integer_of_size::type extractMantissa(emulated_float64_t x) - { - return extractMantissa(x.data); - } + IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); + IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); + IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); + IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); + + //template<> + //uint32_t extractBiasedExponent(emulated_float64_t x) + //{ + // return extractBiasedExponent(x.data); + //} + + //template<> + //int extractExponent(emulated_float64_t x) + //{ + // return extractExponent(x.data); + //} + + //template<> + // emulated_float64_t replaceBiasedExponent(emulated_float64_t x, typename unsigned_integer_of_size::type biasedExp) + //{ + // return emulated_float64_t(replaceBiasedExponent(x.data, biasedExp)); + //} + + ////// performs no overflow tests, returns x*exp2(n) + //template <> + // emulated_float64_t fastMulExp2(emulated_float64_t x, int n) + //{ + // return emulated_float64_t(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n))); + //} + + //template <> + //unsigned_integer_of_size::type extractMantissa(emulated_float64_t x) + //{ + // return extractMantissa(x.data); + //} } } @@ -479,4 +496,7 @@ namespace ieee754 #undef FLOAT_ROUND_UP #undef FLOAT_ROUNDING_MODE +#undef COMMA +#undef IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE + #endif diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index b4a65d785d..606c415fdb 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -97,8 +97,10 @@ struct traits : traits_base NBL_CONSTEXPR_STATIC_INLINE bit_rep_t signMask = bit_rep_t(0x1u) << (sizeof(Float) * 8 - 1); NBL_CONSTEXPR_STATIC_INLINE bit_rep_t exponentMask = ((~bit_rep_t(0)) << base_t::mantissaBitCnt) ^ signMask; NBL_CONSTEXPR_STATIC_INLINE bit_rep_t mantissaMask = (bit_rep_t(0x1u) << base_t::mantissaBitCnt) - 1; - NBL_CONSTEXPR_STATIC_INLINE bit_rep_t exponentBias = (int(0x1) << (base_t::exponentBitCnt - 1)) - 1; + NBL_CONSTEXPR_STATIC_INLINE int exponentBias = (int(0x1) << (base_t::exponentBitCnt - 1)) - 1; NBL_CONSTEXPR_STATIC_INLINE bit_rep_t inf = exponentMask; + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t specialValueExp = (1ull << base_t::exponentBitCnt) - 1; + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t quietNaN = exponentMask | (1ull << (base_t::mantissaBitCnt - 1)); }; template diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index 33ea303968..63c99d4321 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -6,6 +6,12 @@ #include #include +#define FLOAT_ROUND_NEAREST_EVEN 0 +#define FLOAT_ROUND_TO_ZERO 1 +#define FLOAT_ROUND_DOWN 2 +#define FLOAT_ROUND_UP 3 +#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN + // TODO: when it will be possible, use this unions wherever they fit: /* * union Mantissa @@ -41,97 +47,13 @@ namespace hlsl { namespace impl { - struct uint128Mantissa + uint64_t2 shiftMantissaLeftBy52(uint64_t mantissa64) { - uint64_t highBits; - uint64_t lowBits; - - static uint128Mantissa create(uint64_t mantissa64) - { - uint128Mantissa output; - output.highBits = 0u; - output.lowBits = mantissa64; - - return output; - } - - void shiftLeftByOne() - { - highBits = (highBits << 1) | (lowBits >> 63); - lowBits <<= 1; - } - - void shiftRightByOne() - { - highBits >>= 1; - lowBits = (highBits >> 63) | (lowBits >> 1); - } - - // TODO: more efficient comparisions - bool operator>=(uint128Mantissa rhs) - { - return (highBits > rhs.highBits) || (highBits == rhs.highBits && lowBits >= rhs.lowBits); - } - - bool operator<(uint128Mantissa rhs) - { - return (highBits < rhs.highBits) || (highBits == rhs.highBits && lowBits < rhs.lowBits); - } - - uint128Mantissa operator-(uint128Mantissa rhs) - { - uint128Mantissa result; - result.lowBits = lowBits - rhs.lowBits; - result.highBits = highBits - rhs.highBits - (lowBits < rhs.lowBits); - return result; - } - - static uint128Mantissa createAsShiftedByMantissaBitCnt(uint64_t mantissa64) - { - uint128Mantissa output; - output.highBits = mantissa64 >> (64 - nbl::hlsl::ieee754::traits::mantissaBitCnt); - output.lowBits = mantissa64 << nbl::hlsl::ieee754::traits::mantissaBitCnt; + uint64_t2 output; + output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); + output.y = mantissa64 << ieee754::traits::mantissaBitCnt; - return output; - } - - uint64_t divByFloat64(uint64_t floatRep) - { - uint128Mantissa output = create(0); - - uint128Mantissa divisor = create(floatRep); - uint128Mantissa remainder; - remainder.highBits = highBits; - remainder.lowBits = lowBits; - uint128Mantissa one = create(1); - - - while ((divisor.highBits < (1ULL << 63)) && (divisor < remainder)) - { - divisor.shiftLeftByOne(); - one.shiftLeftByOne(); - } - - while (one.highBits != 0 || one.lowBits != 0) - { - if (remainder >= divisor) - { - remainder = remainder - divisor; - output.highBits |= one.highBits; - output.lowBits |= one.lowBits; - } - output.shiftRightByOne(); - one.shiftRightByOne(); - } - - return output.lowBits; - } - }; - - uint64_t divMantissas(uint64_t lhs, uint64_t rhs) - { - uint128Mantissa lhs128 = uint128Mantissa::createAsShiftedByMantissaBitCnt(lhs); - return lhs128.divByFloat64(rhs); + return output; } template @@ -154,7 +76,7 @@ namespace impl uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) { uint64_t product = uint64_t(lhs) * uint64_t(rhs); - nbl::hlsl::uint32_t2 output; + uint32_t2 output; output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); output.y = uint32_t(product & 0x00000000FFFFFFFFull); return output; @@ -165,18 +87,18 @@ namespace impl #if defined RELAXED_NAN_PROPAGATION return lhs | rhs; #else - const bool lhsIsNaN = isnan(bit_cast(lhs)); - const bool rhsIsNaN = isnan(bit_cast(rhs)); - lhs |= 0x0000000000080000ull; - rhs |= 0x0000000000080000ull; - return lerp(rhs, lerp(lhs, rhs, rhsIsNaN), lhsIsNaN); + lhs |= 0x0008000000000000ull; + rhs |= 0x0008000000000000ull; + return lerp(rhs, lerp(lhs, rhs, isnan(rhs)), isnan(lhs)); + return 0; #endif } + uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) { - nbl::hlsl::uint32_t2 z; + uint32_t2 z; z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; z.y = zFrac1; @@ -197,9 +119,9 @@ namespace impl return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); } - nbl::hlsl::uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { - nbl::hlsl::uint32_t2 output; + uint32_t2 output; output.y = a1 + b1; output.x = a0 + b0 + uint32_t(output.y < a1); @@ -207,9 +129,9 @@ namespace impl } - nbl::hlsl::uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) + uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { - nbl::hlsl::uint32_t2 output; + uint32_t2 output; output.y = a1 - b1; output.x = a0 - b0 - uint32_t(a1 < b1); @@ -220,7 +142,7 @@ namespace impl int countLeadingZeros32(uint32_t val) { #ifndef __HLSL_VERSION - return 31 - nbl::hlsl::findMSB(val); + return 31 - findMSB(val); #else return 31 - firstbithigh(val); #endif @@ -285,7 +207,7 @@ namespace impl return output; } - nbl::hlsl::uint32_t3 shift64ExtraRightJamming(uint32_t3 val, int count) + uint32_t3 shift64ExtraRightJamming(uint32_t3 val, int count) { uint32_t3 output; output.x = 0u; @@ -317,13 +239,11 @@ namespace impl { const uint32_t2 packed = packUint64(val); - nbl::hlsl::uint32_t2 output; + uint32_t2 output; output.y = packed.y << count; // TODO: fix output.x = lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); - // y = 3092377600 - // x = 2119009566 return unpackUint64(output); }; @@ -407,13 +327,13 @@ namespace impl zExp = lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); } - return assembleFloat64(zSign, uint64_t(zExp) << nbl::hlsl::ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); + return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); } uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) { int shiftCount; - nbl::hlsl::uint32_t3 frac = nbl::hlsl::uint32_t3(frac0, frac1, 0u); + uint32_t3 frac = uint32_t3(frac0, frac1, 0u); if (frac.x == 0u) { @@ -425,7 +345,8 @@ namespace impl shiftCount = countLeadingZeros32(frac.x) - 11; if (0 <= shiftCount) { - frac.xy = shortShift64Left(unpackUint64(frac.xy), shiftCount); + // TODO: this is packing and unpacking madness, fix it + frac.xy = packUint64(shortShift64Left(unpackUint64(frac.xy), shiftCount)); } else { @@ -459,11 +380,105 @@ namespace impl rhs ^= ieee754::traits::signMask; bool output = lhs == rhs && ieee754::traits::inf; - bool output = output && ((lhs & (~ieee754::traits::signMask)) == ieee754::traits::inf); + output = output && ((lhs & (~ieee754::traits::signMask)) == ieee754::traits::inf); return output; } -} + bool areBothZero(uint64_t lhs, uint64_t rhs) + { + return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); + } + + bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) + { + return ((lhs << 1) == 0ull) && (lhs == rhs); + } + + // TODO: find more efficient algorithm + uint64_t nlz64(uint64_t x) + { + static const uint64_t MASK = 1ull << 63; + + uint64_t counter = 0; + + while ((x & MASK) == 0) + { + x <<= 1; + ++counter; + } + return counter; + } + + uint64_t2 divmod128by64(const uint64_t u1, const uint64_t u0, uint64_t v) + { + const uint64_t b = 1ull << 32; + uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; + uint64_t s; + + s = nlz64(v); + v <<= s; + vn1 = v >> 32; + vn0 = v & 0xffffffff; + if (s > 0) + { + un32 = (u1 << s) | (u0 >> (64 - s)); + un10 = u0 << s; + } + else + { + un32 = u1; + un10 = u0; + } + + un1 = un10 >> 32; + un0 = un10 & 0xffffffff; + + q1 = un32 / vn1; + rhat = un32 % vn1; + + left = q1 * vn0; + right = (rhat << 32) + un1; + while ((q1 >= b) || (left > right)) + { + --q1; + rhat += vn1; + if (rhat < b) + { + left -= vn0; + right = (rhat << 32) | un1; + } + break; + } + + un21 = (un32 << 32) + (un1 - (q1 * v)); + + q0 = un21 / vn1; + rhat = un21 % vn1; + + left = q0 * vn0; + right = (rhat << 32) | un0; + while ((q0 >= b) || (left > right)) + { + --q0; + rhat += vn1; + if (rhat < b) + { + left -= vn0; + right = (rhat << 32) | un0; + continue; + } + break; + } + + uint64_t2 output; + output.x = (q1 << 32) | q0; // quotient + output.y = ((un21 << 32) + (un0 - (q0 * v))) >> s; // remainder + + return output; + } +} +} +} #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 464b4ed9ac..6738ac2d86 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -16,7 +16,7 @@ bool isnan(Float val) { using AsUint = typename unsigned_integer_of_size::type; AsUint asUint = bit_cast(val); - return bool((asUint & ieee754::traits::exponentMask) && (asUint & ieee754::traits::mantissaMask)); + return bool((ieee754::extractBiasedExponent(val) == ieee754::traits::specialValueExp) && (asUint & ieee754::traits::mantissaMask)); } } From a382ee51fd94db6d721aa09df4635aa79dba5e09 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 31 Jul 2024 15:52:05 +0200 Subject: [PATCH 023/432] Refactorization --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 14 ++++---- .../hlsl/impl/emulated_float64_t_impl.hlsl | 35 +++++++++---------- include/nbl/builtin/hlsl/limits.hlsl | 5 +-- include/nbl/builtin/hlsl/tgmath.hlsl | 1 + 5 files changed, 28 insertions(+), 29 deletions(-) diff --git a/examples_tests b/examples_tests index c53f5c8186..822446f98b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c53f5c8186780516833b2ffadba277c131bbc9fb +Subproject commit 822446f98b718c4f719c9f8a9ab70fb0cea74e1a diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index fd156af407..ffe6b1f471 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -107,20 +107,20 @@ namespace hlsl swap(lhsBiasedExp, rhsBiasedExp); } - if (lhsBiasedExp == 0x7FF) + if (lhsBiasedExp == ieee754::traits::specialValueExp) { const bool propagate = (lhsMantissa) != 0u; return createPreserveBitPattern(lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); } expDiff = lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = lerp(rhsMantissa | 0x0010000000000000ull, rhsMantissa, rhsBiasedExp == 0); + rhsMantissa = lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); rhsMantissa = impl::unpackUint64(shifted.xy); mantissaExtended.z = shifted.z; biasedExp = lhsBiasedExp; - lhsMantissa |= 0x0010000000000000ull; + lhsMantissa |= (1ull << 52); mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); --biasedExp; if (!(mantissaExtended.x < 0x00200000u)) @@ -151,7 +151,7 @@ namespace hlsl lhsSign ^= ieee754::traits::signMask; } - if (lhsBiasedExp == 0x7FF) + if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = lhsMantissa != 0u; return createPreserveBitPattern(lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); @@ -233,7 +233,7 @@ namespace hlsl { if (lhsBiasedExp == ieee754::traits::specialValueExp) { - if ((lhsMantissa != 0u) || ((rhsBiasedExp == 0x7FF) && (rhsMantissa != 0u))) + if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) return createPreserveBitPattern(ieee754::traits::quietNaN); @@ -296,7 +296,7 @@ namespace hlsl int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy52(lhsRealMantissa); - uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa).x; + uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); if (mantissa & (0x1ull << 53)) { @@ -446,8 +446,6 @@ unsigned_integer_of_size::type extractMantissa(Type x)\ return extractMantissa(x.data);\ }\ - - namespace ieee754 { IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index 63c99d4321..478ae44090 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -287,10 +287,10 @@ namespace impl // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) { - return packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); + return assembleFloat64(zSign, 0x7FE << ieee754::traits::mantissaBitCnt, 0x000FFFFFFFFFFFFFull); } - return packFloat64(zSign, 0x7FF, 0u, 0u); + return assembleFloat64(zSign, ieee754::traits::exponentMask, 0ull); } } @@ -410,30 +410,33 @@ namespace impl return counter; } - uint64_t2 divmod128by64(const uint64_t u1, const uint64_t u0, uint64_t v) + // returns pair of quotient and remainder + uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) { const uint64_t b = 1ull << 32; uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; uint64_t s; - s = nlz64(v); - v <<= s; - vn1 = v >> 32; - vn0 = v & 0xffffffff; + //TODO: countl_zero + s = countl_zero(divisor); + //s = nlz64(divisor); + divisor <<= s; + vn1 = divisor >> 32; + vn0 = divisor & 0xFFFFFFFF; if (s > 0) { - un32 = (u1 << s) | (u0 >> (64 - s)); - un10 = u0 << s; + un32 = (dividentHigh << s) | (dividentLow >> (64 - s)); + un10 = dividentLow << s; } else { - un32 = u1; - un10 = u0; + un32 = dividentHigh; + un10 = dividentLow; } un1 = un10 >> 32; - un0 = un10 & 0xffffffff; + un0 = un10 & 0xFFFFFFFF; q1 = un32 / vn1; rhat = un32 % vn1; @@ -452,7 +455,7 @@ namespace impl break; } - un21 = (un32 << 32) + (un1 - (q1 * v)); + un21 = (un32 << 32) + (un1 - (q1 * divisor)); q0 = un21 / vn1; rhat = un21 % vn1; @@ -472,11 +475,7 @@ namespace impl break; } - uint64_t2 output; - output.x = (q1 << 32) | q0; // quotient - output.y = ((un21 << 32) + (un0 - (q0 * v))) >> s; // remainder - - return output; + return (q1 << 32) | q0; } } } diff --git a/include/nbl/builtin/hlsl/limits.hlsl b/include/nbl/builtin/hlsl/limits.hlsl index 585cf3eff6..4b93af099c 100644 --- a/include/nbl/builtin/hlsl/limits.hlsl +++ b/include/nbl/builtin/hlsl/limits.hlsl @@ -4,6 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_LIMITS_INCLUDED_ #define _NBL_BUILTIN_HLSL_LIMITS_INCLUDED_ +#include #include #include @@ -127,7 +128,7 @@ struct num_base : type_identity NBL_CONSTEXPR_STATIC_INLINE int32_t float_max_decimal_exponent = 4*S16 + 30*S32 + 232*S64; NBL_CONSTEXPR_STATIC_INLINE int32_t float_exponent_bits = 8 * size - float_digits - 1; - NBL_CONSTEXPR_STATIC_INLINE int32_t float_max_exponent = 1l << float_exponent_bits; + NBL_CONSTEXPR_STATIC_INLINE int32_t float_max_exponent = 1 << float_exponent_bits; NBL_CONSTEXPR_STATIC_INLINE int32_t float_min_exponent = 3 - float_max_exponent; NBL_CONSTEXPR_STATIC_INLINE bool is_bool = is_same::value; @@ -236,7 +237,7 @@ template<> struct num_traits : num_base { NBL_CONSTEXPR_STATIC_INLINE float64_t max = 1.7976931348623158e+308; - NBL_CONSTEXPR_STATIC_INLINE float64_t min = 2.2250738585072014e-308; + NBL_CONSTEXPR_STATIC_INLINE NBL_FP64_LITERAL(float64_t) min = 2.2250738585072014e-308; NBL_CONSTEXPR_STATIC_INLINE float64_t denorm_min = 4.9406564584124654e-324; NBL_CONSTEXPR_STATIC_INLINE uint64_t quiet_NaN = 0x7FF8000000000000ull; NBL_CONSTEXPR_STATIC_INLINE uint64_t signaling_NaN = 0x7FF0000000000001ull; diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 6738ac2d86..63e7802df7 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -15,6 +15,7 @@ template bool isnan(Float val) { using AsUint = typename unsigned_integer_of_size::type; + using AsFloat = typename float_of_size::type; AsUint asUint = bit_cast(val); return bool((ieee754::extractBiasedExponent(val) == ieee754::traits::specialValueExp) && (asUint & ieee754::traits::mantissaMask)); } From 1e419da30d3210bdb232ab902577adde465e57df Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 1 Aug 2024 20:52:20 +0200 Subject: [PATCH 024/432] Fixes --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 39 ++++++----- .../hlsl/impl/emulated_float64_t_impl.hlsl | 68 +++++++++++-------- include/nbl/builtin/hlsl/tgmath.hlsl | 7 ++ 4 files changed, 65 insertions(+), 51 deletions(-) diff --git a/examples_tests b/examples_tests index deaa9f2c24..f99b039d79 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit deaa9f2c248b25d808f5c7c58553abe2ec9d21ee +Subproject commit f99b039d7970be6ea0ca590e257dc999cecd8ae9 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index ffe6b1f471..4945617ab8 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -2,7 +2,6 @@ #define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ #include - namespace nbl { namespace hlsl @@ -86,7 +85,7 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return createPreserveBitPattern(lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(nbl::hlsl::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); } mantissa = lhsMantissa + rhsMantissa; @@ -110,11 +109,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { const bool propagate = (lhsMantissa) != 0u; - return createPreserveBitPattern(lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(nbl::hlsl::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); } - expDiff = lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); + expDiff = nbl::hlsl::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = nbl::hlsl::lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); rhsMantissa = impl::unpackUint64(shifted.xy); mantissaExtended.z = shifted.z; @@ -154,11 +153,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = lhsMantissa != 0u; - return createPreserveBitPattern(lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(nbl::hlsl::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); } - expDiff = lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); + expDiff = nbl::hlsl::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = nbl::hlsl::lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); lhsMantissa |= 0x4000000000000000ull; frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); @@ -169,10 +168,10 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return createPreserveBitPattern(lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(nbl::hlsl::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); } - rhsBiasedExp = lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); - lhsBiasedExp = lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); + rhsBiasedExp = nbl::hlsl::lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); + lhsBiasedExp = nbl::hlsl::lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); @@ -200,11 +199,11 @@ namespace hlsl signOfDifference = ieee754::traits::signMask; } - biasedExp = lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); + biasedExp = nbl::hlsl::lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); lhsSign ^= signOfDifference; uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return createPreserveBitPattern(lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + return createPreserveBitPattern(nbl::hlsl::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); } } @@ -295,10 +294,10 @@ namespace hlsl const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; - uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy52(lhsRealMantissa); + uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); - if (mantissa & (0x1ull << 53)) + if (mantissa & (0x1ull << 54)) { ++exp; } @@ -315,7 +314,7 @@ namespace hlsl // relational operators bool operator==(emulated_float64_t rhs) { - if (!FastMath && (isnan(data) || isnan(rhs.data))) + if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) return false; // TODO: i'm not sure about this one if (!FastMath && impl::areBothZero(data, rhs.data)) @@ -330,7 +329,7 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) { - if (!FastMath && (isnan(data) || isnan(rhs.data))) + if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) return true; // TODO: i'm not sure about this one if (!FastMath && impl::areBothSameSignZero(data, rhs.data)) @@ -345,7 +344,7 @@ namespace hlsl } bool operator<(emulated_float64_t rhs) { - if (!FastMath && (isnan(data) || isnan(rhs.data))) + if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) return false; if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) return false; @@ -365,7 +364,7 @@ namespace hlsl } bool operator>(emulated_float64_t rhs) { - if (!FastMath && (isnan(data) || isnan(rhs.data))) + if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) return true; if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) return false; @@ -404,7 +403,7 @@ namespace hlsl bool isNaN() { - return isnan(bit_cast(data)); + return nbl::hlsl::isnan(bit_cast(data)); } }; diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index 478ae44090..2c284fbd43 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -45,13 +45,21 @@ namespace nbl { namespace hlsl { + // TODO: better implementation, also this needs to be moved somewhere else + template + UINT lerp(UINT a, UINT b, bool c) + { + return c ? b : a; + } + + namespace impl { - uint64_t2 shiftMantissaLeftBy52(uint64_t mantissa64) + uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; - output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); - output.y = mantissa64 << ieee754::traits::mantissaBitCnt; + output.x = mantissa64 >> (63 - ieee754::traits::mantissaBitCnt); + output.y = mantissa64 << (ieee754::traits::mantissaBitCnt + 1); return output; } @@ -90,7 +98,7 @@ namespace impl lhs |= 0x0008000000000000ull; rhs |= 0x0008000000000000ull; - return lerp(rhs, lerp(lhs, rhs, isnan(rhs)), isnan(lhs)); + return nbl::hlsl::lerp(rhs, nbl::hlsl::lerp(lhs, rhs, nbl::hlsl::isnan(rhs)), nbl::hlsl::isnan(lhs)); return 0; #endif } @@ -153,16 +161,16 @@ namespace impl uint32_t2 output; const int negCount = (-count) & 31; - output.x = lerp(0u, val.x, count == 0); - output.x = lerp(output.x, (val.x >> count), count < 32); + output.x = nbl::hlsl::lerp(0u, val.x, count == 0); + output.x = nbl::hlsl::lerp(output.x, (val.x >> count), count < 32); output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); - output.y = lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); + output.y = nbl::hlsl::lerp(0u, (val.x >> (count & 31)), count < 64); + output.y = nbl::hlsl::lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); - val.z = lerp(val.z | val.y, val.z, count < 32); - output.x = lerp(output.x, val.x >> count, count < 32); + val.z = nbl::hlsl::lerp(val.z | val.y, val.z, count < 32); + output.x = nbl::hlsl::lerp(output.x, val.x >> count, count < 32); output.z |= uint32_t(val.z != 0u); - output.x = lerp(output.x, 0u, (count == 32)); - output.y = lerp(output.y, val.x, (count == 32)); - output.z = lerp(output.z, val.y, (count == 32)); - output.x = lerp(output.x, val.x, (count == 0)); - output.y = lerp(output.y, val.y, (count == 0)); - output.z = lerp(output.z, val.z, (count == 0)); + output.x = nbl::hlsl::lerp(output.x, 0u, (count == 32)); + output.y = nbl::hlsl::lerp(output.y, val.x, (count == 32)); + output.z = nbl::hlsl::lerp(output.z, val.y, (count == 32)); + output.x = nbl::hlsl::lerp(output.x, val.x, (count == 0)); + output.y = nbl::hlsl::lerp(output.y, val.y, (count == 0)); + output.z = nbl::hlsl::lerp(output.z, val.z, (count == 0)); return output; } @@ -242,7 +250,7 @@ namespace impl uint32_t2 output; output.y = packed.y << count; // TODO: fix - output.x = lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); + output.x = nbl::hlsl::lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); return unpackUint64(output); }; @@ -324,7 +332,7 @@ namespace impl } else { - zExp = lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); + zExp = nbl::hlsl::lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); } return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); @@ -363,15 +371,15 @@ namespace impl uint32_t2 mantissaPacked = packUint64(mantissa); int shiftCount; uint32_t2 temp; - shiftCount = countLeadingZeros32(lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; - outExp = lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); + shiftCount = countLeadingZeros32(nbl::hlsl::lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; + outExp = nbl::hlsl::lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - temp.x = lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); - temp.y = lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); + temp.x = nbl::hlsl::lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); + temp.y = nbl::hlsl::lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); - outMantissa = lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); + outMantissa = nbl::hlsl::lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); } bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 63e7802df7..38a49171e5 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -20,6 +20,13 @@ bool isnan(Float val) return bool((ieee754::extractBiasedExponent(val) == ieee754::traits::specialValueExp) && (asUint & ieee754::traits::mantissaMask)); } +template <> +bool isnan(uint64_t val) +{ + float64_t asFloat = bit_cast(val); + return bool((ieee754::extractBiasedExponent(asFloat) == ieee754::traits::specialValueExp) && (val & ieee754::traits::mantissaMask)); +} + } } From 2b7ba9be32650b1c8352c69776dc165fc1c59415 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 1 Aug 2024 20:54:43 +0200 Subject: [PATCH 025/432] Correction --- include/nbl/builtin/hlsl/emulated_float64_t.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 4945617ab8..a781145866 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -297,7 +297,7 @@ namespace hlsl uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); - if (mantissa & (0x1ull << 54)) + if (mantissa & (0x1ull << 53)) { ++exp; } From f6c6e59e563a49a237587d52c74c11699396579a Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 6 Aug 2024 15:12:09 +0200 Subject: [PATCH 026/432] Saving work --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 142 ++++++++++++++---- include/nbl/builtin/hlsl/ieee754.hlsl | 3 +- .../hlsl/impl/emulated_float64_t_impl.hlsl | 62 ++++---- include/nbl/builtin/hlsl/tgmath.hlsl | 13 ++ 5 files changed, 154 insertions(+), 68 deletions(-) diff --git a/examples_tests b/examples_tests index f99b039d79..54cf5e9271 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f99b039d7970be6ea0ca590e257dc999cecd8ae9 +Subproject commit 54cf5e9271636f6898a174e61aff1eec36bda51a diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index a781145866..cd7e3c43fe 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -60,9 +60,91 @@ namespace hlsl return emulated_float64_t(val); } + uint64_t shiftLeftAllowNegBitCnt(uint64_t val, int n) + { + if (n < 0) + return val >> -n; + else + return val << n; + } + // arithmetic operators emulated_float64_t operator+(const emulated_float64_t rhs) { +// { +// uint64_t lhsSign = data & ieee754::traits::signMask; +// uint64_t rhsSign = rhs.data & ieee754::traits::signMask; +// uint64_t lhsMantissa = ieee754::extractMantissa(data); +// uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); +// int lhsBiasedExp = ieee754::extractBiasedExponent(data); +// int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); +// +// if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) +// return createPreserveBitPattern(ieee754::traits::quietNaN); +// /*if (std::isinf(lhs) || std::isinf(rhs)) +// { +// if (std::isinf(lhs) && !std::isinf(rhs)) +// return lhs; +// if (std::isinf(rhs) && !std::isinf(lhs)) +// return rhs; +// if (rhs == lhs) +// return rhs; +// +// return nan(); +// }*/ +// +// int rp = min(ieee754::extractExponent(data), ieee754::extractExponent(rhs.data)) - ieee754::traits::mantissaBitCnt; +// +// uint64_t lhsRealMantissa = lhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); +// uint64_t rhsRealMantissa = rhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); +// uint64_t lhsSignTmp = lhsSign >> (52 + 11); +// uint64_t rhsSignTmp = rhsSign >> (52 + 11); +// +// uint64_t sign = 0u; +// if (lhsSign != rhsSign) +// { +// uint64_t _min = max(data, rhs.data); +// uint64_t _max = min(data, rhs.data); +// uint64_t minAbs = _min ^ ieee754::traits::signMask; +// if (minAbs > _max) +// sign = ieee754::traits::signMask; +// +// } +// +// int64_t lhsMantissaTmp = (shiftLeftAllowNegBitCnt(lhsRealMantissa, lhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-lhsSignTmp)) + lhsSignTmp; +// int64_t rhsMantissaTmp = (shiftLeftAllowNegBitCnt(rhsRealMantissa, rhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-rhsSignTmp)) + rhsSignTmp; +// +// uint64_t addTmp = bit_cast(lhsMantissaTmp + rhsMantissaTmp); +// +// // renormalize +// if (!FastMath && false) // TODO: hande nan +// { +// +// } +// else +// { +//#ifndef __HLSL_VERSION +// int l2 = log2(double(addTmp)); +//#else +// int intl2 = 0; +//#endif +// +// if (!FastMath && (rp + l2 + 1 < nbl::hlsl::numeric_limits::min_exponent)) +// { +// return createPreserveBitPattern(impl::assembleFloat64(0, ieee754::traits::exponentMask, 0)); +// } +// else +// { +// rp = addTmp ? l2 + rp + ieee754::traits::exponentBias : 0; +// return createPreserveBitPattern(impl::assembleFloat64( +// sign, +// (uint64_t(rp) << ieee754::traits::mantissaBitCnt) & ieee754::traits::exponentMask, +// shiftLeftAllowNegBitCnt(addTmp, (ieee754::traits::mantissaBitCnt - l2)) & ieee754::traits::mantissaMask) +// ); +// } +// } +// } + emulated_float64_t retval = createPreserveBitPattern(0u); uint64_t mantissa; @@ -85,7 +167,7 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return createPreserveBitPattern(nbl::hlsl::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(tgmath::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); } mantissa = lhsMantissa + rhsMantissa; @@ -109,11 +191,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { const bool propagate = (lhsMantissa) != 0u; - return createPreserveBitPattern(nbl::hlsl::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(tgmath::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); } - expDiff = nbl::hlsl::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = nbl::hlsl::lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); + expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = tgmath::lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); rhsMantissa = impl::unpackUint64(shifted.xy); mantissaExtended.z = shifted.z; @@ -153,11 +235,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = lhsMantissa != 0u; - return createPreserveBitPattern(nbl::hlsl::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(tgmath::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); } - expDiff = nbl::hlsl::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = nbl::hlsl::lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); + expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = tgmath::lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); lhsMantissa |= 0x4000000000000000ull; frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); @@ -168,10 +250,10 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return createPreserveBitPattern(nbl::hlsl::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return createPreserveBitPattern(tgmath::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); } - rhsBiasedExp = nbl::hlsl::lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); - lhsBiasedExp = nbl::hlsl::lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); + rhsBiasedExp = tgmath::lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); + lhsBiasedExp = tgmath::lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); @@ -199,11 +281,11 @@ namespace hlsl signOfDifference = ieee754::traits::signMask; } - biasedExp = nbl::hlsl::lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); + biasedExp = tgmath::lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); lhsSign ^= signOfDifference; uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return createPreserveBitPattern(nbl::hlsl::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + return createPreserveBitPattern(tgmath::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); } } @@ -285,9 +367,18 @@ namespace hlsl return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); } + /*emulated_float64_t reciprocal(uint64_t x) + { + using ThisType = emulated_float64_t; + ThisType output = ThisType::createPreserveBitPattern((0xbfcdd6a18f6a6f52ULL - x) >> 1); + output = output * output; + return output; + }*/ + emulated_float64_t operator/(const emulated_float64_t rhs) { - // TODO: maybe add function to extract real mantissa + //return emulated_float64_t::createPreserveBitPattern(data) * reciprocal(rhs.data); + const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); @@ -312,15 +403,15 @@ namespace hlsl } // relational operators - bool operator==(emulated_float64_t rhs) + bool operator==(emulated_float64_t rhs) { - if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; // TODO: i'm not sure about this one if (!FastMath && impl::areBothZero(data, rhs.data)) return true; - const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); + const emulated_float64_t xored = createPreserveBitPattern(data ^ rhs.data); // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) return true; @@ -329,22 +420,11 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) { - if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) - return true; - // TODO: i'm not sure about this one - if (!FastMath && impl::areBothSameSignZero(data, rhs.data)) - return false; - - const emulated_float64_t xored = emulated_float64_t::createPreserveBitPattern(data ^ rhs.data); - // TODO: check what fast math returns for -0 == 0 - if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) - return false; - - return xored.data; + return !(createPreserveBitPattern(data) == rhs); } bool operator<(emulated_float64_t rhs) { - if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) return false; @@ -364,7 +444,7 @@ namespace hlsl } bool operator>(emulated_float64_t rhs) { - if (!FastMath && (nbl::hlsl::isnan(data) || nbl::hlsl::isnan(rhs.data))) + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return true; if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) return false; @@ -403,7 +483,7 @@ namespace hlsl bool isNaN() { - return nbl::hlsl::isnan(bit_cast(data)); + return tgmath::isnan(bit_cast(data)); } }; diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 606c415fdb..712d7440b8 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -126,7 +126,8 @@ uint32_t extractBiasedExponent(float64_t x) template int extractExponent(T x) { - return int(extractBiasedExponent(x)) - int(traits::exponentBias); + using AsFloat = typename float_of_size::type; + return int(extractBiasedExponent(x)) - int(traits::exponentBias); } template diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index 2c284fbd43..f2c2760994 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -45,14 +45,6 @@ namespace nbl { namespace hlsl { - // TODO: better implementation, also this needs to be moved somewhere else - template - UINT lerp(UINT a, UINT b, bool c) - { - return c ? b : a; - } - - namespace impl { uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) @@ -98,7 +90,7 @@ namespace impl lhs |= 0x0008000000000000ull; rhs |= 0x0008000000000000ull; - return nbl::hlsl::lerp(rhs, nbl::hlsl::lerp(lhs, rhs, nbl::hlsl::isnan(rhs)), nbl::hlsl::isnan(lhs)); + return tgmath::lerp(rhs, tgmath::lerp(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); return 0; #endif } @@ -161,16 +153,16 @@ namespace impl uint32_t2 output; const int negCount = (-count) & 31; - output.x = nbl::hlsl::lerp(0u, val.x, count == 0); - output.x = nbl::hlsl::lerp(output.x, (val.x >> count), count < 32); + output.x = tgmath::lerp(0u, val.x, count == 0); + output.x = tgmath::lerp(output.x, (val.x >> count), count < 32); output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); - output.y = nbl::hlsl::lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); + output.y = tgmath::lerp(0u, (val.x >> (count & 31)), count < 64); + output.y = tgmath::lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); - val.z = nbl::hlsl::lerp(val.z | val.y, val.z, count < 32); - output.x = nbl::hlsl::lerp(output.x, val.x >> count, count < 32); + val.z = tgmath::lerp(val.z | val.y, val.z, count < 32); + output.x = tgmath::lerp(output.x, val.x >> count, count < 32); output.z |= uint32_t(val.z != 0u); - output.x = nbl::hlsl::lerp(output.x, 0u, (count == 32)); - output.y = nbl::hlsl::lerp(output.y, val.x, (count == 32)); - output.z = nbl::hlsl::lerp(output.z, val.y, (count == 32)); - output.x = nbl::hlsl::lerp(output.x, val.x, (count == 0)); - output.y = nbl::hlsl::lerp(output.y, val.y, (count == 0)); - output.z = nbl::hlsl::lerp(output.z, val.z, (count == 0)); + output.x = tgmath::lerp(output.x, 0u, (count == 32)); + output.y = tgmath::lerp(output.y, val.x, (count == 32)); + output.z = tgmath::lerp(output.z, val.y, (count == 32)); + output.x = tgmath::lerp(output.x, val.x, (count == 0)); + output.y = tgmath::lerp(output.y, val.y, (count == 0)); + output.z = tgmath::lerp(output.z, val.z, (count == 0)); return output; } @@ -250,7 +242,7 @@ namespace impl uint32_t2 output; output.y = packed.y << count; // TODO: fix - output.x = nbl::hlsl::lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); + output.x = tgmath::lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); return unpackUint64(output); }; @@ -332,7 +324,7 @@ namespace impl } else { - zExp = nbl::hlsl::lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); + zExp = tgmath::lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); } return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); @@ -371,15 +363,15 @@ namespace impl uint32_t2 mantissaPacked = packUint64(mantissa); int shiftCount; uint32_t2 temp; - shiftCount = countLeadingZeros32(nbl::hlsl::lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; - outExp = nbl::hlsl::lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); + shiftCount = countLeadingZeros32(tgmath::lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; + outExp = tgmath::lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - temp.x = nbl::hlsl::lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); - temp.y = nbl::hlsl::lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); + temp.x = tgmath::lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); + temp.y = tgmath::lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); - outMantissa = nbl::hlsl::lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); + outMantissa = tgmath::lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); } bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 38a49171e5..490c3f9f70 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -11,6 +11,10 @@ namespace nbl { namespace hlsl { + +namespace tgmath +{ + template bool isnan(Float val) { @@ -27,6 +31,15 @@ bool isnan(uint64_t val) return bool((ieee754::extractBiasedExponent(asFloat) == ieee754::traits::specialValueExp) && (val & ieee754::traits::mantissaMask)); } +// TODO: better implementation, also i'm not sure this is the right place for this function +template +UINT lerp(UINT a, UINT b, bool c) +{ + return c ? b : a; +} + +} + } } From b816d68c7e72dd02341d4228723e66861b466700 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 6 Aug 2024 15:28:54 +0200 Subject: [PATCH 027/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 54cf5e9271..10605e9087 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 54cf5e9271636f6898a174e61aff1eec36bda51a +Subproject commit 10605e9087cf56b802b41bad92ddf273156d1020 From 9cc10c3a0cde6be2d8b303e4afc10d5a55535da5 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 7 Aug 2024 17:30:36 +0200 Subject: [PATCH 028/432] Saving work --- examples_tests | 2 +- src/nbl/video/CVulkanPhysicalDevice.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 10605e9087..aefcde9939 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 10605e9087cf56b802b41bad92ddf273156d1020 +Subproject commit aefcde99399b6ff77675fefe5bc16b5d3bb99c6d diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 541a600b03..f13429efc1 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -1615,7 +1615,8 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic vk_deviceFeatures2.features.shaderStorageImageArrayDynamicIndexing = limits.shaderStorageImageArrayDynamicIndexing; vk_deviceFeatures2.features.shaderClipDistance = true; // good device support vk_deviceFeatures2.features.shaderCullDistance = enabledFeatures.shaderCullDistance; - vk_deviceFeatures2.features.shaderFloat64 = limits.shaderFloat64; + //vk_deviceFeatures2.features.shaderFloat64 = limits.shaderFloat64; // TODO: enable back + vk_deviceFeatures2.features.shaderFloat64 = VK_FALSE; vk_deviceFeatures2.features.shaderInt64 = true; // always enable vk_deviceFeatures2.features.shaderInt16 = true; // always enable vk_deviceFeatures2.features.shaderResourceResidency = enabledFeatures.shaderResourceResidency; From 48b133c364cbd2e449c7cfc9e31dffbb024dedf0 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 13 Aug 2024 12:35:39 +0200 Subject: [PATCH 029/432] Saving work --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 964 ++++++++++++------ include/nbl/builtin/hlsl/ieee754.hlsl | 49 +- .../hlsl/impl/emulated_float64_t_impl.hlsl | 704 ++++++------- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 39 +- include/nbl/builtin/hlsl/tgmath.hlsl | 16 +- 6 files changed, 1061 insertions(+), 713 deletions(-) diff --git a/examples_tests b/examples_tests index aefcde9939..9b1edf6a48 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit aefcde99399b6ff77675fefe5bc16b5d3bb99c6d +Subproject commit 9b1edf6a48513f28a7a9312260c3c790a8392232 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index cd7e3c43fe..dd517c2f8c 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -2,6 +2,14 @@ #define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ #include + +// weird dxc compiler errors +#ifndef __HLSL_VERSION +#define CONST const +#else +#define CONST +#endif + namespace nbl { namespace hlsl @@ -19,27 +27,32 @@ namespace hlsl return emulated_float64_t(bit_cast(float64_t(val))); }*/ - static emulated_float64_t create(int32_t val) + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(emulated_float64_t val) + { + return createPreserveBitPattern(val.data); + } + + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int32_t val) { return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t create(int64_t val) + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int64_t val) { return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t create(uint32_t val) + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint32_t val) { return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t create(uint64_t val) + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint64_t val) { return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t create(float64_t val) + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float64_t val) { return emulated_float64_t(bit_cast(val)); } @@ -50,16 +63,24 @@ namespace hlsl return emulated_float64_t(bit_cast(float64_t(val))); }*/ - static emulated_float64_t create(float32_t val) + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float32_t val) { return emulated_float64_t(bit_cast(float64_t(val))); } - static emulated_float64_t createPreserveBitPattern(uint64_t val) + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t createPreserveBitPattern(uint64_t val) { return emulated_float64_t(val); } + inline float getAsFloat32() + { + // TODO: don't use double + return float(bit_cast(data)); + + } + +#if 0 uint64_t shiftLeftAllowNegBitCnt(uint64_t val, int n) { if (n < 0) @@ -67,229 +88,240 @@ namespace hlsl else return val << n; } +#endif // arithmetic operators - emulated_float64_t operator+(const emulated_float64_t rhs) + emulated_float64_t operator+(const emulated_float64_t rhs) CONST { -// { -// uint64_t lhsSign = data & ieee754::traits::signMask; -// uint64_t rhsSign = rhs.data & ieee754::traits::signMask; -// uint64_t lhsMantissa = ieee754::extractMantissa(data); -// uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); -// int lhsBiasedExp = ieee754::extractBiasedExponent(data); -// int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); -// -// if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) -// return createPreserveBitPattern(ieee754::traits::quietNaN); -// /*if (std::isinf(lhs) || std::isinf(rhs)) -// { -// if (std::isinf(lhs) && !std::isinf(rhs)) -// return lhs; -// if (std::isinf(rhs) && !std::isinf(lhs)) -// return rhs; -// if (rhs == lhs) -// return rhs; -// -// return nan(); -// }*/ -// -// int rp = min(ieee754::extractExponent(data), ieee754::extractExponent(rhs.data)) - ieee754::traits::mantissaBitCnt; -// -// uint64_t lhsRealMantissa = lhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); -// uint64_t rhsRealMantissa = rhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); -// uint64_t lhsSignTmp = lhsSign >> (52 + 11); -// uint64_t rhsSignTmp = rhsSign >> (52 + 11); -// -// uint64_t sign = 0u; -// if (lhsSign != rhsSign) -// { -// uint64_t _min = max(data, rhs.data); -// uint64_t _max = min(data, rhs.data); -// uint64_t minAbs = _min ^ ieee754::traits::signMask; -// if (minAbs > _max) -// sign = ieee754::traits::signMask; -// -// } -// -// int64_t lhsMantissaTmp = (shiftLeftAllowNegBitCnt(lhsRealMantissa, lhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-lhsSignTmp)) + lhsSignTmp; -// int64_t rhsMantissaTmp = (shiftLeftAllowNegBitCnt(rhsRealMantissa, rhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-rhsSignTmp)) + rhsSignTmp; -// -// uint64_t addTmp = bit_cast(lhsMantissaTmp + rhsMantissaTmp); -// -// // renormalize -// if (!FastMath && false) // TODO: hande nan -// { -// -// } -// else -// { -//#ifndef __HLSL_VERSION -// int l2 = log2(double(addTmp)); -//#else -// int intl2 = 0; -//#endif -// -// if (!FastMath && (rp + l2 + 1 < nbl::hlsl::numeric_limits::min_exponent)) -// { -// return createPreserveBitPattern(impl::assembleFloat64(0, ieee754::traits::exponentMask, 0)); -// } -// else -// { -// rp = addTmp ? l2 + rp + ieee754::traits::exponentBias : 0; -// return createPreserveBitPattern(impl::assembleFloat64( -// sign, -// (uint64_t(rp) << ieee754::traits::mantissaBitCnt) & ieee754::traits::exponentMask, -// shiftLeftAllowNegBitCnt(addTmp, (ieee754::traits::mantissaBitCnt - l2)) & ieee754::traits::mantissaMask) -// ); -// } -// } -// } - - emulated_float64_t retval = createPreserveBitPattern(0u); - - uint64_t mantissa; - uint32_t3 mantissaExtended; - int biasedExp; - - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - int expDiff = lhsBiasedExp - rhsBiasedExp; - - if (lhsSign == rhsSign) +#if 0 { - if (expDiff == 0) + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + return createPreserveBitPattern(ieee754::traits::quietNaN); + /*if (std::isinf(lhs) || std::isinf(rhs)) { - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return createPreserveBitPattern(tgmath::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - mantissa = lhsMantissa + rhsMantissa; - if (lhsBiasedExp == 0) - return createPreserveBitPattern(impl::assembleFloat64(lhsSign, 0, mantissa)); - mantissaExtended.xy = impl::packUint64(mantissa); - mantissaExtended.x |= 0x00200000u; - mantissaExtended.z = 0u; - biasedExp = lhsBiasedExp; + if (std::isinf(lhs) && !std::isinf(rhs)) + return lhs; + if (std::isinf(rhs) && !std::isinf(lhs)) + return rhs; + if (rhs == lhs) + return rhs; + + return nan(); + }*/ + + int rp = min(ieee754::extractExponent(data), ieee754::extractExponent(rhs.data)) - ieee754::traits::mantissaBitCnt; + + uint64_t lhsRealMantissa = lhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); + uint64_t rhsRealMantissa = rhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); + uint64_t lhsSignTmp = lhsSign >> (52 + 11); + uint64_t rhsSignTmp = rhsSign >> (52 + 11); + + uint64_t sign = 0u; + if (lhsSign != rhsSign) + { + uint64_t _min = max(data, rhs.data); + uint64_t _max = min(data, rhs.data); + uint64_t minAbs = _min ^ ieee754::traits::signMask; + if (minAbs > _max) + sign = ieee754::traits::signMask; - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); } - else + + int64_t lhsMantissaTmp = (shiftLeftAllowNegBitCnt(lhsRealMantissa, lhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-lhsSignTmp)) + lhsSignTmp; + int64_t rhsMantissaTmp = (shiftLeftAllowNegBitCnt(rhsRealMantissa, rhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-rhsSignTmp)) + rhsSignTmp; + + uint64_t addTmp = bit_cast(lhsMantissaTmp + rhsMantissaTmp); + + // renormalize + if (!FastMath && false) // TODO: hande nan { - if (expDiff < 0) - { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - const bool propagate = (lhsMantissa) != 0u; - return createPreserveBitPattern(tgmath::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = tgmath::lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); - const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); - rhsMantissa = impl::unpackUint64(shifted.xy); - mantissaExtended.z = shifted.z; - biasedExp = lhsBiasedExp; - - lhsMantissa |= (1ull << 52); - mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); - --biasedExp; - if (!(mantissaExtended.x < 0x00200000u)) - { - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); - ++biasedExp; - } - - return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); + } - - // cannot happen but compiler cries about not every path returning value - return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); - } - else - { - lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); - rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); - - if (expDiff != 0) + else { - uint32_t2 frac; - - if (expDiff < 0) +#ifndef __HLSL_VERSION + int l2 = log2(double(addTmp)); +#else + int intl2 = 0; +#endif + + if (!FastMath && (rp + l2 + 1 < nbl::hlsl::numeric_limits::min_exponent)) { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - lhsSign ^= ieee754::traits::signMask; + return createPreserveBitPattern(impl::assembleFloat64(0, ieee754::traits::exponentMask, 0)); } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) + else { - bool propagate = lhsMantissa != 0u; - return createPreserveBitPattern(tgmath::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + rp = addTmp ? l2 + rp + ieee754::traits::exponentBias : 0; + return createPreserveBitPattern(impl::assembleFloat64( + sign, + (uint64_t(rp) << ieee754::traits::mantissaBitCnt) & ieee754::traits::exponentMask, + shiftLeftAllowNegBitCnt(addTmp, (ieee754::traits::mantissaBitCnt - l2)) & ieee754::traits::mantissaMask) + ); } - - expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = tgmath::lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); - rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); - lhsMantissa |= 0x4000000000000000ull; - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - biasedExp = lhsBiasedExp; - --biasedExp; - return createPreserveBitPattern(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); } - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return createPreserveBitPattern(tgmath::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - rhsBiasedExp = tgmath::lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); - lhsBiasedExp = tgmath::lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); - + } +#endif - const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); - const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); + if (FlushDenormToZero) + { + emulated_float64_t retval = createPreserveBitPattern(0u); - uint32_t2 frac; - uint64_t signOfDifference = 0; - if (rhsMantissaPacked.x < lhsMantissaPacked.x) - { - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - } - else if (lhsMantissaPacked.x < rhsMantissaPacked.x) - { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; - } - else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) + uint64_t mantissa; + uint32_t3 mantissaExtended; + int biasedExp; + + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + int expDiff = lhsBiasedExp - rhsBiasedExp; + + if (lhsSign == rhsSign) { - /* It is possible that frac.x and frac.y may be zero after this. */ - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + if (expDiff == 0) + { + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = (lhsMantissa | rhsMantissa) != 0u; + return createPreserveBitPattern(tgmath::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + mantissa = lhsMantissa + rhsMantissa; + if (lhsBiasedExp == 0) + return createPreserveBitPattern(impl::assembleFloat64(lhsSign, 0, mantissa)); + mantissaExtended.xy = impl::packUint64(mantissa); + mantissaExtended.x |= 0x00200000u; + mantissaExtended.z = 0u; + biasedExp = lhsBiasedExp; + + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); + } + else + { + if (expDiff < 0) + { + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); + } + + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + const bool propagate = (lhsMantissa) != 0u; + return createPreserveBitPattern(tgmath::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = tgmath::lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); + const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); + rhsMantissa = impl::unpackUint64(shifted.xy); + mantissaExtended.z = shifted.z; + biasedExp = lhsBiasedExp; + + lhsMantissa |= (1ull << 52); + mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); + --biasedExp; + if (!(mantissaExtended.x < 0x00200000u)) + { + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); + ++biasedExp; + } + + return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); + } + + // cannot happen but compiler cries about not every path returning value + return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); } else { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; + lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); + rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); + + if (expDiff != 0) + { + uint32_t2 frac; + + if (expDiff < 0) + { + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); + lhsSign ^= ieee754::traits::signMask; + } + + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = lhsMantissa != 0u; + return createPreserveBitPattern(tgmath::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = tgmath::lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); + rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); + lhsMantissa |= 0x4000000000000000ull; + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + biasedExp = lhsBiasedExp; + --biasedExp; + return createPreserveBitPattern(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); + } + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; + return createPreserveBitPattern(tgmath::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + rhsBiasedExp = tgmath::lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); + lhsBiasedExp = tgmath::lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); + + + const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); + const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); + + uint32_t2 frac; + uint64_t signOfDifference = 0; + if (rhsMantissaPacked.x < lhsMantissaPacked.x) + { + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + } + else if (lhsMantissaPacked.x < rhsMantissaPacked.x) + { + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; + } + else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) + { + /* It is possible that frac.x and frac.y may be zero after this. */ + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + } + else + { + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; + } + + biasedExp = tgmath::lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); + lhsSign ^= signOfDifference; + uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); + uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); + return createPreserveBitPattern(tgmath::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); } - - biasedExp = tgmath::lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); - lhsSign ^= signOfDifference; - uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); - uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return createPreserveBitPattern(tgmath::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + } + else + { + //static_assert(false, "not implemented yet"); + return createPreserveBitPattern(0xdeadbeefbadcaffeull); } } - emulated_float64_t operator-(emulated_float64_t rhs) + emulated_float64_t operator-(emulated_float64_t rhs) CONST { emulated_float64_t lhs = createPreserveBitPattern(data); emulated_float64_t rhsFlipped = rhs.flipSign(); @@ -297,74 +329,82 @@ namespace hlsl return lhs + rhsFlipped; } - emulated_float64_t operator*(emulated_float64_t rhs) + emulated_float64_t operator*(emulated_float64_t rhs) CONST { - emulated_float64_t retval = emulated_float64_t::createPreserveBitPattern(0u); - - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; - uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - if (!FastMath) + if(FlushDenormToZero) { - if (lhsBiasedExp == ieee754::traits::specialValueExp) + emulated_float64_t retval = emulated_float64_t::createPreserveBitPattern(0u); + + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; + uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; + if (!FastMath) { - if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) - return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); - if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) - return createPreserveBitPattern(ieee754::traits::quietNaN); + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) + return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); + if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) + return createPreserveBitPattern(ieee754::traits::quietNaN); - return createPreserveBitPattern(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); - } - if (rhsBiasedExp == ieee754::traits::specialValueExp) - { - /* a cannot be NaN, but is b NaN? */ - if (rhsMantissa != 0u) + return createPreserveBitPattern(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); + } + if (rhsBiasedExp == ieee754::traits::specialValueExp) + { + /* a cannot be NaN, but is b NaN? */ + if (rhsMantissa != 0u) #ifdef RELAXED_NAN_PROPAGATION - return rhs.data; + return rhs.data; #else - return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); + return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); #endif - if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) - return createPreserveBitPattern(ieee754::traits::quietNaN); + if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) + return createPreserveBitPattern(ieee754::traits::quietNaN); - return createPreserveBitPattern(sign | ieee754::traits::exponentMask); - } - if (lhsBiasedExp == 0) - { - if (lhsMantissa == 0u) - return createPreserveBitPattern(sign); - impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); + return createPreserveBitPattern(sign | ieee754::traits::exponentMask); + } + if (lhsBiasedExp == 0) + { + if (lhsMantissa == 0u) + return createPreserveBitPattern(sign); + impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); + } + if (rhsBiasedExp == 0) + { + if (rhsMantissa == 0u) + return createPreserveBitPattern(sign); + impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); + } } - if (rhsBiasedExp == 0) + + const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); + const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); + const uint64_t hi_r = (rhsMantissa >> 21) | (1ull << 31); + const uint64_t lo_r = rhsMantissa & ((1ull << 21) - 1); + + //const uint64_t RoundToNearest = (1ull << 31) - 1; + uint64_t newPseudoMantissa = ((hi_l * hi_r) >> 10) + ((hi_l * lo_r + lo_l * hi_r/* + RoundToNearest*/) >> 31); + + if (newPseudoMantissa & (0x1ull << 53)) { - if (rhsMantissa == 0u) - return createPreserveBitPattern(sign); - impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); + newPseudoMantissa >>= 1; + ++exp; } - } - - const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); - const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); - const uint64_t hi_r = (rhsMantissa >> 21) | (1ull << 31); - const uint64_t lo_r = rhsMantissa & ((1ull << 21) - 1); + newPseudoMantissa &= (ieee754::traits::mantissaMask); - //const uint64_t RoundToNearest = (1ull << 31) - 1; - uint64_t newPseudoMantissa = ((hi_l * hi_r) >> 10) + ((hi_l * lo_r + lo_l * hi_r/* + RoundToNearest*/) >> 31); - - if (newPseudoMantissa & (0x1ull << 53)) + return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); + } + else { - newPseudoMantissa >>= 1; - ++exp; + //static_assert(false, "not implemented yet"); + return createPreserveBitPattern(0xdeadbeefbadcaffeull); } - newPseudoMantissa &= (ieee754::traits::mantissaMask); - - return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); } /*emulated_float64_t reciprocal(uint64_t x) @@ -375,35 +415,58 @@ namespace hlsl return output; }*/ - emulated_float64_t operator/(const emulated_float64_t rhs) + emulated_float64_t operator/(const emulated_float64_t rhs) CONST { - //return emulated_float64_t::createPreserveBitPattern(data) * reciprocal(rhs.data); + if (FlushDenormToZero) + { + //return emulated_float64_t::createPreserveBitPattern(data) * reciprocal(rhs.data); - const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); - const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); - - const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return createPreserveBitPattern(ieee754::traits::quietNaN); + if (!FastMath && ((rhs.data << 1) == 0)) + return createPreserveBitPattern(ieee754::traits::quietNaN); - uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); - uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); + const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - if (mantissa & (0x1ull << 53)) - { - ++exp; + if (!FastMath && impl::areBothInfinity(data, rhs.data)) + return createPreserveBitPattern(ieee754::traits::quietNaN | sign); + + if (!FastMath && tgmath::isInf(data)) + return createPreserveBitPattern((data & ~ieee754::traits::signMask) | sign); + + if (!FastMath && tgmath::isInf(rhs.data)) + return createPreserveBitPattern(0ull | sign); + + + + const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); + const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); + + int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; + + uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); + uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); + + while (mantissa < (1ull << 52)) + { + mantissa <<= 1; + exp--; + } + + mantissa &= ieee754::traits::mantissaMask; + + return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); } else { - mantissa >>= 1; + //static_assert(false, "not implemented yet"); + return createPreserveBitPattern(0xdeadbeefbadcaffeull); } - - mantissa &= ieee754::traits::mantissaMask; - - return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); } // relational operators - bool operator==(emulated_float64_t rhs) + // TODO: should `FlushDenormToZero` affect relational operators? + bool operator==(emulated_float64_t rhs) CONST { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; @@ -418,11 +481,11 @@ namespace hlsl return !(xored.data); } - bool operator!=(emulated_float64_t rhs) + bool operator!=(emulated_float64_t rhs) CONST { return !(createPreserveBitPattern(data) == rhs); } - bool operator<(emulated_float64_t rhs) + bool operator<(emulated_float64_t rhs) CONST { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; @@ -442,7 +505,7 @@ namespace hlsl return (lhsFlipped & diffBits) < (rhsFlipped & diffBits); } - bool operator>(emulated_float64_t rhs) + bool operator>(emulated_float64_t rhs) CONST { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return true; @@ -462,18 +525,13 @@ namespace hlsl return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); } - bool operator<=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } + bool operator<=(emulated_float64_t rhs) CONST { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } bool operator>=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) < emulated_float64_t::createPreserveBitPattern(rhs.data)); } //logical operators - bool operator&&(emulated_float64_t rhs) { return bool(data) && bool(rhs.data); } - bool operator||(emulated_float64_t rhs) { return bool(data) || bool(rhs.data); } - bool operator!() { return !bool(data); } - - // OMITED OPERATORS - // - not implementing bitwise and modulo operators since floating point types doesn't support them - // - compound operator overload not supported in HLSL - // - access operators (dereference and addressof) not supported in HLSL + bool operator&&(emulated_float64_t rhs) CONST { return bool(data) && bool(rhs.data); } + bool operator||(emulated_float64_t rhs) CONST { return bool(data) || bool(rhs.data); } + bool operator!() CONST { return !bool(data); } // TODO: should modify self? emulated_float64_t flipSign() @@ -496,31 +554,31 @@ struct traits_base\ NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52;\ };\ template<>\ -uint32_t extractBiasedExponent(Type x)\ +static inline uint32_t extractBiasedExponent(Type x)\ {\ return extractBiasedExponent(x.data);\ }\ \ template<>\ -int extractExponent(Type x)\ +static inline int extractExponent(Type x)\ {\ return extractExponent(x.data);\ }\ \ template<>\ -Type replaceBiasedExponent(Type x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_STATIC_INLINE Type replaceBiasedExponent(Type x, typename unsigned_integer_of_size::type biasedExp)\ {\ return Type(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -Type fastMulExp2(Type x, int n)\ +NBL_CONSTEXPR_STATIC_INLINE Type fastMulExp2(Type x, int n)\ {\ return Type(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -unsigned_integer_of_size::type extractMantissa(Type x)\ +NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size::type extractMantissa(Type x)\ {\ return extractMantissa(x.data);\ }\ @@ -531,42 +589,298 @@ namespace ieee754 IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); - - //template<> - //uint32_t extractBiasedExponent(emulated_float64_t x) - //{ - // return extractBiasedExponent(x.data); - //} - - //template<> - //int extractExponent(emulated_float64_t x) - //{ - // return extractExponent(x.data); - //} - - //template<> - // emulated_float64_t replaceBiasedExponent(emulated_float64_t x, typename unsigned_integer_of_size::type biasedExp) - //{ - // return emulated_float64_t(replaceBiasedExponent(x.data, biasedExp)); - //} - - ////// performs no overflow tests, returns x*exp2(n) - //template <> - // emulated_float64_t fastMulExp2(emulated_float64_t x, int n) - //{ - // return emulated_float64_t(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n))); - //} - - //template <> - //unsigned_integer_of_size::type extractMantissa(emulated_float64_t x) - //{ - // return extractMantissa(x.data); - //} } +// TODO: finish it + +// TODO: this is mess, refactorize it +#ifndef __HLSL_VERSION +using ef64_t2 = vector; +using ef64_t3 = vector; +using ef64_t4 = vector; +using ef64_t3x3 = matrix; +using ef64_t2x2 = matrix; +#else +struct ef64_t2 +{ + emulated_float64_t x; + emulated_float64_t y; + + emulated_float64_t calcComponentSum() CONST + { + return x + y; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(emulated_float64_t x, emulated_float64_t y) + { + ef64_t2 output; + output.x = x; + output.y = y; + + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float val) + { + ef64_t2 output; + output.x = emulated_float64_t::create(val); + output.y = emulated_float64_t::create(val); + + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float32_t2 val) + { + ef64_t2 output; + output.x = emulated_float64_t::create(val.x); + output.y = emulated_float64_t::create(val.y); + + return output; + } + + ef64_t2 operator+(float rhs) + { + ef64_t2 output; + emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); + output.x = x + rhsAsEF64; + output.y = y + rhsAsEF64; + + return output; + } + + ef64_t2 operator+(emulated_float64_t rhs) + { + ef64_t2 output; + output.x = x + rhs; + output.y = y + rhs; + + return output; + } + + ef64_t2 operator+(ef64_t2 rhs) + { + ef64_t2 output; + output.x = x + rhs.x; + output.y = y + rhs.y; + + return output; + } + + ef64_t2 operator-(float rhs) + { + return create(x, y) + (-rhs); + } + + ef64_t2 operator-(emulated_float64_t rhs) + { + return create(x, y) + (rhs.flipSign()); + } + + ef64_t2 operator-(ef64_t2 rhs) + { + rhs.x = rhs.x.flipSign(); + rhs.y = rhs.y.flipSign(); + return create(x, y) + rhs; + } + + ef64_t2 operator*(float rhs) + { + ef64_t2 output; + emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); + output.x = x * rhsAsEF64; + output.y = y * rhsAsEF64; + + return output; + } + + ef64_t2 operator*(emulated_float64_t rhs) + { + ef64_t2 output; + output.x = x * rhs; + output.y = y * rhs; + + return output; + } + + ef64_t2 operator*(ef64_t2 rhs) + { + ef64_t2 output; + output.x = x * rhs.x; + output.y = y * rhs.y; + + return output; + } + + float2 getAsFloat2() + { + return float2(x.getAsFloat32(), y.getAsFloat32()); + } +}; + +struct ef64_t3 +{ + emulated_float64_t x; + emulated_float64_t y; + emulated_float64_t z; + + static ef64_t3 create(NBL_REF_ARG(ef64_t3) other) + { + ef64_t3 output; + + output.x = other.x; + output.y = other.y; + output.z = other.z; + + return output; + } + + static ef64_t3 create(NBL_REF_ARG(ef64_t2) other, emulated_float64_t z) + { + ef64_t3 output; + + output.x = other.x; + output.y = other.y; + output.z = z; + + return output; + } + + static ef64_t3 create(NBL_REF_ARG(ef64_t2) other, int z) + { + ef64_t3 output; + + output.x = other.x; + output.y = other.y; + output.z = emulated_float64_t::create(z); + + return output; + } + + emulated_float64_t calcComponentSum() CONST + { + return x + y + z; + } + + ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) CONST + { + ef64_t3 output; + output.x = x * rhs.x; + output.y = x * rhs.y; + output.z = x * rhs.z; + + return output; + } +}; + +struct ef64_t4 +{ + emulated_float64_t x; + emulated_float64_t y; + emulated_float64_t z; + emulated_float64_t w; +}; + +struct ef64_t3x3 +{ + ef64_t3 columns[3]; + + ef64_t3x3 getTransposed() CONST + { + ef64_t3x3 output; + + output.columns[1].x = columns[0].y; + output.columns[2].x = columns[0].z; + + output.columns[0].y = columns[1].x; + output.columns[2].y = columns[1].z; + + output.columns[0].z = columns[3].x; + output.columns[1].z = columns[3].y; + + return output; + } + + ef64_t3x3 operator*(NBL_CONST_REF_ARG(ef64_t3x3) rhs) CONST + { + ef64_t3x3 output; + ef64_t3x3 lhsTransposed = getTransposed(); + + output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); + output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); + output.columns[0].z = (lhsTransposed.columns[0] * rhs.columns[2]).calcComponentSum(); + + output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); + output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); + output.columns[1].z = (lhsTransposed.columns[1] * rhs.columns[2]).calcComponentSum(); + + output.columns[2].x = (lhsTransposed.columns[2] * rhs.columns[0]).calcComponentSum(); + output.columns[2].y = (lhsTransposed.columns[2] * rhs.columns[1]).calcComponentSum(); + output.columns[2].z = (lhsTransposed.columns[2] * rhs.columns[2]).calcComponentSum(); + + return output; + } + + ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) + { + ef64_t3 output; + ef64_t3x3 lhsTransposed = getTransposed(); + + output.x = (columns[0] * rhs).calcComponentSum(); + output.y = (columns[1] * rhs).calcComponentSum(); + output.z = (columns[2] * rhs).calcComponentSum(); + + return output; + } +}; + +struct ef64_t2x2 +{ + ef64_t2 columns[2]; + + ef64_t2x2 getTransposed() CONST + { + ef64_t2x2 output; + + output.columns[1].x = columns[0].y; + output.columns[0].y = columns[1].x; + + return output; + } + + ef64_t2x2 operator*(NBL_CONST_REF_ARG(ef64_t2x2) rhs) CONST + { + ef64_t2x2 output; + ef64_t2x2 lhsTransposed = getTransposed(); + + output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); + output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); + + output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); + output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); + + return output; + } + + ef64_t2 operator*(NBL_CONST_REF_ARG(ef64_t2) rhs) + { + ef64_t2 output; + ef64_t2x2 lhsTransposed = getTransposed(); + + output.x = (columns[0] * rhs).calcComponentSum(); + output.y = (columns[1] * rhs).calcComponentSum(); + + return output; + } +}; + +#endif + } + } +#undef CONST + #undef FLOAT_ROUND_NEAREST_EVEN #undef FLOAT_ROUND_TO_ZERO #undef FLOAT_ROUND_DOWN diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 712d7440b8..e92b45713f 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -5,18 +5,6 @@ #include #include -// TODO: delete -#ifdef __HLSL_VERSION -#define staticAssertTmp(...) ; -#else -void dbgBreakIf(bool condition) -{ - if (!condition) - __debugbreak(); -} -#define staticAssertTmp(x, ...) dbgBreakIf(x); -#endif - namespace nbl { namespace hlsl @@ -37,25 +25,25 @@ namespace impl } template - typename unsigned_integer_of_size::type castToUintType(T x) + NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type castToUintType(T x) { using AsUint = typename unsigned_integer_of_size::type; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t - template <> unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } - template <> unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } - template <> unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } + template <> NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } + template <> NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } + template <> NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } template - T castBackToFloatType(T x) + NBL_CONSTEXPR_STATIC_INLINE T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } - template<> uint16_t castBackToFloatType(uint16_t x) { return x; } - template<> uint32_t castBackToFloatType(uint32_t x) { return x; } - template<> uint64_t castBackToFloatType(uint64_t x) { return x; } + template<> NBL_CONSTEXPR_STATIC_INLINE uint16_t castBackToFloatType(uint16_t x) { return x; } + template<> NBL_CONSTEXPR_STATIC_INLINE uint32_t castBackToFloatType(uint32_t x) { return x; } + template<> NBL_CONSTEXPR_STATIC_INLINE uint64_t castBackToFloatType(uint64_t x) { return x; } } template @@ -104,62 +92,63 @@ struct traits : traits_base }; template -uint32_t extractBiasedExponent(T x) +static inline uint32_t extractBiasedExponent(T x) { using AsUint = typename unsigned_integer_of_size::type; return glsl::bitfieldExtract(impl::castToUintType(x), traits::type>::mantissaBitCnt, traits::type>::exponentBitCnt); } template<> -uint32_t extractBiasedExponent(uint64_t x) +static inline uint32_t extractBiasedExponent(uint64_t x) { const uint32_t highBits = uint32_t(x >> 32); return glsl::bitfieldExtract(highBits, traits::mantissaBitCnt - 32, traits::exponentBitCnt); } template<> -uint32_t extractBiasedExponent(float64_t x) +static inline uint32_t extractBiasedExponent(float64_t x) { return extractBiasedExponent(impl::castToUintType(x)); } template -int extractExponent(T x) +static inline int extractExponent(T x) { using AsFloat = typename float_of_size::type; return int(extractBiasedExponent(x)) - int(traits::exponentBias); } template -T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_STATIC_INLINE T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { - staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); + // TODO: + //staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(impl::castToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); } // performs no overflow tests, returns x*exp2(n) template -T fastMulExp2(T x, int n) +NBL_CONSTEXPR_STATIC_INLINE T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return impl::castToUintType(x) & traits::type>::mantissaMask; } template -typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type extractSign(T x) { return (impl::castToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { return impl::castToUintType(x) & traits::signMask; } diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index f2c2760994..da9586207f 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -47,436 +47,436 @@ namespace hlsl { namespace impl { - uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) - { - uint64_t2 output; - output.x = mantissa64 >> (63 - ieee754::traits::mantissaBitCnt); - output.y = mantissa64 << (ieee754::traits::mantissaBitCnt + 1); +NBL_CONSTEXPR_STATIC_INLINE uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +{ + uint64_t2 output; + output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); + output.y = mantissa64 << (ieee754::traits::mantissaBitCnt); - return output; - } + return output; +} - template - uint64_t promoteToUint64(T val) - { - using AsFloat = typename float_of_size::type; - uint64_t asUint = ieee754::impl::castToUintType(val); +template +NBL_CONSTEXPR_STATIC_INLINE uint64_t promoteToUint64(T val) +{ + using AsFloat = typename float_of_size::type; + uint64_t asUint = ieee754::impl::castToUintType(val); - const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float64_t) - sizeof(T)); - const int64_t newExponent = ieee754::extractExponent(val) + ieee754::traits::exponentBias; + const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float64_t) - sizeof(T)); + const int64_t newExponent = ieee754::extractExponent(val) + ieee754::traits::exponentBias; - const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); - const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::exponentBias - ieee754::traits::mantissaBitCnt); + const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::exponentBias - ieee754::traits::mantissaBitCnt); - return sign | exp | mantissa; - }; + return sign | exp | mantissa; +}; - template<> uint64_t promoteToUint64(float64_t val) { return bit_cast(val); } +template<> NBL_CONSTEXPR_STATIC_INLINE uint64_t promoteToUint64(float64_t val) { return bit_cast(val); } - uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) - { - uint64_t product = uint64_t(lhs) * uint64_t(rhs); - uint32_t2 output; - output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); - output.y = uint32_t(product & 0x00000000FFFFFFFFull); - return output; - } +NBL_CONSTEXPR_STATIC_INLINE uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) +{ + uint64_t product = uint64_t(lhs) * uint64_t(rhs); + uint32_t2 output; + output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); + output.y = uint32_t(product & 0x00000000FFFFFFFFull); + return output; +} - uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) - { +NBL_CONSTEXPR_STATIC_INLINE uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) +{ #if defined RELAXED_NAN_PROPAGATION - return lhs | rhs; + return lhs | rhs; #else - lhs |= 0x0008000000000000ull; - rhs |= 0x0008000000000000ull; - return tgmath::lerp(rhs, tgmath::lerp(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); - return 0; + lhs |= 0x0008000000000000ull; + rhs |= 0x0008000000000000ull; + return tgmath::lerp(rhs, tgmath::lerp(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); + return 0; #endif - } - - - uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) - { - uint32_t2 z; +} - z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; - z.y = zFrac1; +NBL_CONSTEXPR_STATIC_INLINE uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) +{ + uint32_t2 z; - uint64_t output = 0u; - output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; - output |= uint64_t(z.y); - return output; - } + z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; + z.y = zFrac1; - uint32_t2 packUint64(uint64_t val) - { - return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); - } + uint64_t output = 0u; + output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; + output |= uint64_t(z.y); + return output; +} - uint64_t unpackUint64(uint32_t2 val) - { - return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); - } +NBL_CONSTEXPR_STATIC_INLINE uint32_t2 packUint64(uint64_t val) +{ + return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); +} - uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) - { - uint32_t2 output; - output.y = a1 + b1; - output.x = a0 + b0 + uint32_t(output.y < a1); +NBL_CONSTEXPR_STATIC_INLINE uint64_t unpackUint64(uint32_t2 val) +{ + return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); +} - return output; - } +NBL_CONSTEXPR_STATIC_INLINE uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) +{ + uint32_t2 output; + output.y = a1 + b1; + output.x = a0 + b0 + uint32_t(output.y < a1); + return output; +} - uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) - { - uint32_t2 output; - output.y = a1 - b1; - output.x = a0 - b0 - uint32_t(a1 < b1); - - return output; - } +NBL_CONSTEXPR_STATIC_INLINE uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) +{ + uint32_t2 output; + output.y = a1 - b1; + output.x = a0 - b0 - uint32_t(a1 < b1); + + return output; +} // TODO: test - int countLeadingZeros32(uint32_t val) - { +static inline int countLeadingZeros32(uint32_t val) +{ #ifndef __HLSL_VERSION - return 31 - findMSB(val); + return 31 - findMSB(val); #else - return 31 - firstbithigh(val); + return 31 - firstbithigh(val); #endif - } - - uint32_t2 shift64RightJamming(uint32_t2 val, int count) - { - uint32_t2 output; - const int negCount = (-count) & 31; - - output.x = tgmath::lerp(0u, val.x, count == 0); - output.x = tgmath::lerp(output.x, (val.x >> count), count < 32); - - output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ - uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); - output.y = tgmath::lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); +} - val.z = tgmath::lerp(val.z | val.y, val.z, count < 32); - output.x = tgmath::lerp(output.x, val.x >> count, count < 32); - output.z |= uint32_t(val.z != 0u); +NBL_CONSTEXPR_STATIC_INLINE uint32_t2 shift64RightJamming(uint32_t2 val, int count) +{ + uint32_t2 output; + const int negCount = (-count) & 31; + + output.x = tgmath::lerp(0u, val.x, count == 0); + output.x = tgmath::lerp(output.x, (val.x >> count), count < 32); + + output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ + uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> ((-count) & 31))), packed.x, count == 0); - return unpackUint64(output); - }; +NBL_CONSTEXPR_STATIC_INLINE uint32_t4 mul64to128(uint32_t4 mantissasPacked) +{ + uint32_t4 output; + uint32_t more1 = 0u; + uint32_t more2 = 0u; + + // a0 = x + // a1 = y + // b0 = z + // b1 = w + + uint32_t2 z2z3 = umulExtended(mantissasPacked.y, mantissasPacked.w); + output.z = z2z3.x; + output.w = z2z3.y; + uint32_t2 z1more2 = umulExtended(mantissasPacked.y, mantissasPacked.z); + output.y = z1more2.x; + more2 = z1more2.y; + uint32_t2 z1z2 = add64(output.y, more2, 0u, output.z); + output.y = z1z2.x; + output.z = z1z2.y; + uint32_t2 z0more1 = umulExtended(mantissasPacked.x, mantissasPacked.z); + output.x = z0more1.x; + more1 = z0more1.y; + uint32_t2 z0z1 = add64(output.x, more1, 0u, output.y); + output.x = z0z1.x; + output.y = z0z1.y; + uint32_t2 more1more2 = umulExtended(mantissasPacked.x, mantissasPacked.w); + more1 = more1more2.x; + more2 = more1more2.y; + uint32_t2 more1z2 = add64(more1, more2, 0u, output.z); + more1 = more1z2.x; + output.z = more1z2.y; + uint32_t2 z0z12 = add64(output.x, output.y, 0u, more1); + output.x = z0z12.x; + output.y = z0z12.y; + + return output; +} + +NBL_CONSTEXPR_STATIC_INLINE uint32_t3 shift64ExtraRightJamming(uint32_t3 val, int count) +{ + uint32_t3 output; + output.x = 0u; + + int negCount = (-count) & 31; + + output.z = tgmath::lerp(uint32_t(val.x != 0u), val.x, count == 64); + output.z = tgmath::lerp(output.z, val.x << negCount, count < 64); + output.z = tgmath::lerp(output.z, val.y << negCount, count < 32); + + output.y = tgmath::lerp(0u, (val.x >> (count & 31)), count < 64); + output.y = tgmath::lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); + + val.z = tgmath::lerp(val.z | val.y, val.z, count < 32); + output.x = tgmath::lerp(output.x, val.x >> count, count < 32); + output.z |= uint32_t(val.z != 0u); + + output.x = tgmath::lerp(output.x, 0u, (count == 32)); + output.y = tgmath::lerp(output.y, val.x, (count == 32)); + output.z = tgmath::lerp(output.z, val.y, (count == 32)); + output.x = tgmath::lerp(output.x, val.x, (count == 0)); + output.y = tgmath::lerp(output.y, val.y, (count == 0)); + output.z = tgmath::lerp(output.z, val.z, (count == 0)); + + return output; +} - uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) - { - return signShifted + expShifted + mantissa; - } +NBL_CONSTEXPR_STATIC_INLINE uint64_t shortShift64Left(uint64_t val, int count) +{ + const uint32_t2 packed = packUint64(val); - uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) + uint32_t2 output; + output.y = packed.y << count; + // TODO: fix + output.x = tgmath::lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); + + return unpackUint64(output); +}; + +NBL_CONSTEXPR_STATIC_INLINE uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +{ + return signShifted + expShifted + mantissa; +} + +NBL_CONSTEXPR_STATIC_INLINE uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) +{ + bool roundNearestEven; + bool increment; + + roundNearestEven = true; + increment = int(mantissaExtended.z) < 0; + if (!roundNearestEven) { - bool roundNearestEven; - bool increment; - - roundNearestEven = true; - increment = int(mantissaExtended.z) < 0; - if (!roundNearestEven) + if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) { - if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) + increment = false; + } + else + { + if (false) //(zSign != 0u) { - increment = false; - } + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && + // (zFrac2 != 0u); + } else { - if (false) //(zSign != 0u) - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && - // (zFrac2 != 0u); - } - else - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && - // (zFrac2 != 0u); - } + //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && + // (zFrac2 != 0u); } } - if (0x7FD <= zExp) + } + if (0x7FD <= zExp) + { + if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) { - if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) + if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || + // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || + // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) { - if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || - // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || - // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) - { - return assembleFloat64(zSign, 0x7FE << ieee754::traits::mantissaBitCnt, 0x000FFFFFFFFFFFFFull); - } - - return assembleFloat64(zSign, ieee754::traits::exponentMask, 0ull); - } + return assembleFloat64(zSign, 0x7FE << ieee754::traits::mantissaBitCnt, 0x000FFFFFFFFFFFFFull); + } + + return assembleFloat64(zSign, ieee754::traits::exponentMask, 0ull); + } + } + + if (zExp < 0) + { + mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); + zExp = 0; + + if (roundNearestEven) + { + increment = mantissaExtended.z < 0u; } - - if (zExp < 0) + else { - mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); - zExp = 0; - - if (roundNearestEven) + if (zSign != 0u) { - increment = mantissaExtended.z < 0u; + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (mantissaExtended.z != 0u); } else { - if (zSign != 0u) - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (mantissaExtended.z != 0u); - } - else - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (mantissaExtended.z != 0u); - } + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (mantissaExtended.z != 0u); } } - - if (increment) - { - const uint64_t added = impl::unpackUint64(uint32_t2(mantissaExtended.xy)) + 1ull; - mantissaExtended.xy = packUint64(added); - mantissaExtended.y &= ~((mantissaExtended.z + uint32_t(mantissaExtended.z == 0u)) & uint32_t(roundNearestEven)); - } - else - { - zExp = tgmath::lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); - } - - return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); } - - uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) + + if (increment) { - int shiftCount; - uint32_t3 frac = uint32_t3(frac0, frac1, 0u); - - if (frac.x == 0u) - { - exp -= 32; - frac.x = frac.y; - frac.y = 0u; - } - - shiftCount = countLeadingZeros32(frac.x) - 11; - if (0 <= shiftCount) - { - // TODO: this is packing and unpacking madness, fix it - frac.xy = packUint64(shortShift64Left(unpackUint64(frac.xy), shiftCount)); - } - else - { - frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); - } - exp -= shiftCount; - return roundAndPackFloat64(sign, exp, frac); + const uint64_t added = impl::unpackUint64(uint32_t2(mantissaExtended.xy)) + 1ull; + mantissaExtended.xy = packUint64(added); + mantissaExtended.y &= ~((mantissaExtended.z + uint32_t(mantissaExtended.z == 0u)) & uint32_t(roundNearestEven)); } - - void normalizeFloat64Subnormal(uint64_t mantissa, - NBL_REF_ARG(int) outExp, - NBL_REF_ARG(uint64_t) outMantissa) + else { - uint32_t2 mantissaPacked = packUint64(mantissa); - int shiftCount; - uint32_t2 temp; - shiftCount = countLeadingZeros32(tgmath::lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; - outExp = tgmath::lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - - temp.x = tgmath::lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); - temp.y = tgmath::lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); + zExp = tgmath::lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); + } + + return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); +} - shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); +static inline uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) +{ + int shiftCount; + uint32_t3 frac = uint32_t3(frac0, frac1, 0u); - outMantissa = tgmath::lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); - } - - bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) + if (frac.x == 0u) { - lhs ^= ieee754::traits::signMask; - rhs ^= ieee754::traits::signMask; - - bool output = lhs == rhs && ieee754::traits::inf; - output = output && ((lhs & (~ieee754::traits::signMask)) == ieee754::traits::inf); - - return output; + exp -= 32; + frac.x = frac.y; + frac.y = 0u; } - bool areBothZero(uint64_t lhs, uint64_t rhs) + shiftCount = countLeadingZeros32(frac.x) - 11; + if (0 <= shiftCount) { - return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); + // TODO: this is packing and unpacking madness, fix it + frac.xy = packUint64(shortShift64Left(unpackUint64(frac.xy), shiftCount)); } - - bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) + else { - return ((lhs << 1) == 0ull) && (lhs == rhs); + frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); } + exp -= shiftCount; + return roundAndPackFloat64(sign, exp, frac); +} - // TODO: find more efficient algorithm - uint64_t nlz64(uint64_t x) - { - static const uint64_t MASK = 1ull << 63; +static inline void normalizeFloat64Subnormal(uint64_t mantissa, + NBL_REF_ARG(int) outExp, + NBL_REF_ARG(uint64_t) outMantissa) +{ + uint32_t2 mantissaPacked = packUint64(mantissa); + int shiftCount; + uint32_t2 temp; + shiftCount = countLeadingZeros32(tgmath::lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; + outExp = tgmath::lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - uint64_t counter = 0; + temp.x = tgmath::lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); + temp.y = tgmath::lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); - while ((x & MASK) == 0) - { - x <<= 1; - ++counter; - } - return counter; + shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); + + outMantissa = tgmath::lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); +} + +NBL_CONSTEXPR_STATIC_INLINE bool areBothInfinity(uint64_t lhs, uint64_t rhs) +{ + lhs &= ~ieee754::traits::signMask; + rhs &= ~ieee754::traits::signMask; + + return lhs == rhs && lhs == ieee754::traits::inf; +} + +NBL_CONSTEXPR_STATIC_INLINE bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) +{ + return lhs == rhs && (lhs & ~ieee754::traits::signMask) == ieee754::traits::inf; +} + +NBL_CONSTEXPR_STATIC_INLINE bool areBothZero(uint64_t lhs, uint64_t rhs) +{ + return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); +} + +NBL_CONSTEXPR_STATIC_INLINE bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +{ + return ((lhs << 1) == 0ull) && (lhs == rhs); +} + +// TODO: find more efficient algorithm +static inline uint64_t nlz64(uint64_t x) +{ + static const uint64_t MASK = 1ull << 63; + + uint64_t counter = 0; + + while ((x & MASK) == 0) + { + x <<= 1; + ++counter; } + return counter; +} - // returns pair of quotient and remainder - uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) +// returns pair of quotient and remainder +static inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) +{ + const uint64_t b = 1ull << 32; + uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; + uint64_t s; + + //TODO: countl_zero + s = countl_zero(divisor); + //s = nlz64(divisor); + divisor <<= s; + vn1 = divisor >> 32; + vn0 = divisor & 0xFFFFFFFF; + + if (s > 0) { - const uint64_t b = 1ull << 32; - uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; - uint64_t s; - - //TODO: countl_zero - s = countl_zero(divisor); - //s = nlz64(divisor); - divisor <<= s; - vn1 = divisor >> 32; - vn0 = divisor & 0xFFFFFFFF; - - if (s > 0) - { - un32 = (dividentHigh << s) | (dividentLow >> (64 - s)); - un10 = dividentLow << s; - } - else - { - un32 = dividentHigh; - un10 = dividentLow; - } + un32 = (dividentHigh << s) | (dividentLow >> (64 - s)); + un10 = dividentLow << s; + } + else + { + un32 = dividentHigh; + un10 = dividentLow; + } - un1 = un10 >> 32; - un0 = un10 & 0xFFFFFFFF; + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; - q1 = un32 / vn1; - rhat = un32 % vn1; + q1 = un32 / vn1; + rhat = un32 % vn1; - left = q1 * vn0; - right = (rhat << 32) + un1; - while ((q1 >= b) || (left > right)) + left = q1 * vn0; + right = (rhat << 32) + un1; + while ((q1 >= b) || (left > right)) + { + --q1; + rhat += vn1; + if (rhat < b) { - --q1; - rhat += vn1; - if (rhat < b) - { - left -= vn0; - right = (rhat << 32) | un1; - } - break; + left -= vn0; + right = (rhat << 32) | un1; } + break; + } - un21 = (un32 << 32) + (un1 - (q1 * divisor)); + un21 = (un32 << 32) + (un1 - (q1 * divisor)); - q0 = un21 / vn1; - rhat = un21 % vn1; + q0 = un21 / vn1; + rhat = un21 % vn1; - left = q0 * vn0; - right = (rhat << 32) | un0; - while ((q0 >= b) || (left > right)) + left = q0 * vn0; + right = (rhat << 32) | un0; + while ((q0 >= b) || (left > right)) + { + --q0; + rhat += vn1; + if (rhat < b) { - --q0; - rhat += vn1; - if (rhat < b) - { - left -= vn0; - right = (rhat << 32) | un0; - continue; - } - break; + left -= vn0; + right = (rhat << 32) | un0; + continue; } - - return (q1 << 32) | q0; + break; } + + return (q1 << 32) | q0; +} } } } diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index adea0556c2..e454c4c8ef 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -10,6 +10,7 @@ #include #include #include +#include // TODO: Later include from correct hlsl header (numeric_limits.hlsl) #ifndef nbl_hlsl_FLT_EPSILON @@ -26,16 +27,52 @@ namespace nbl { namespace hlsl { + +// TODO(emulated_float64_t): this shouldn't be in the nbl::hlsl space +// struct VecT is solution to +// error: 'nbl::hlsl::emulated_float64_t' cannot be used as a type parameter where a scalar is required +// using float_t2 = typename conditional >::value, ef64_t2, vector >::type; +#ifdef __HLSL_VERSION +template +struct VecT { using type = void; }; +template<> +struct VecT { using type = vector; }; +template<> +struct VecT { using type = vector; }; +template<> +struct VecT { using type = vector; }; +template<> +struct VecT, 2> { using type = ef64_t2; }; +template<> +struct VecT, 3> { using type = ef64_t3; }; +template<> +struct VecT, 4> { using type = float64_t4; }; + +template +struct Mat2x2T { using type = float64_t2x2; }; +template<> +struct Mat2x2T { using type = float64_t2x2; }; +template<> +struct Mat2x2T > { using type = ef64_t2x2; }; + +#endif + namespace shapes { template struct QuadraticBezier { +#ifndef __HLSL_VERSION using float_t2 = vector; using float_t3 = vector; using float_t4 = vector; using float_t2x2 = matrix; - +#else + using float_t2 = typename VecT::type; + using float_t3 = typename VecT::type; + using float_t4 = typename VecT::type; + using float_t2x2 = typename Mat2x2T::type; +#endif float_t2 P0; float_t2 P1; float_t2 P2; diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 490c3f9f70..d3afb09bb7 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -4,6 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_TGMATH_INCLUDED_ #define _NBL_BUILTIN_HLSL_TGMATH_INCLUDED_ +#include #include #include @@ -16,7 +17,7 @@ namespace tgmath { template -bool isnan(Float val) +static inline bool isnan(Float val) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -25,19 +26,26 @@ bool isnan(Float val) } template <> -bool isnan(uint64_t val) +static inline bool isnan(uint64_t val) { float64_t asFloat = bit_cast(val); return bool((ieee754::extractBiasedExponent(asFloat) == ieee754::traits::specialValueExp) && (val & ieee754::traits::mantissaMask)); } // TODO: better implementation, also i'm not sure this is the right place for this function -template -UINT lerp(UINT a, UINT b, bool c) +template +NBL_CONSTEXPR_STATIC_INLINE Uint lerp(Uint a, Uint b, bool c) { return c ? b : a; } +template +NBL_CONSTEXPR_STATIC_INLINE bool isInf(Uint val) +{ + using AsFloat = typename float_of_size::type; + return (val & ~ieee754::traits::signMask) == ieee754::traits::inf; +} + } } From 5ce988f505bb041d7758d7b328bf766d7248af36 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Tue, 13 Aug 2024 17:28:52 +0200 Subject: [PATCH 030/432] update DXC submodule --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index a08b6cbeb1..bcedaf749f 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit a08b6cbeb1038d14d0586d10a8cfa507b2fda8eb +Subproject commit bcedaf749fb6325dc41f9b436f1f2ea0a660de5e From 6b0bb287a5de4362c92987b72c961bbfa3882744 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 20 Aug 2024 12:27:00 +0100 Subject: [PATCH 031/432] No Float64 cap --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat.hlsl | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 390 +++--------------- .../hlsl/emulated_float64_t_utils.hlsl | 365 ++++++++++++++++ include/nbl/builtin/hlsl/ieee754.hlsl | 40 +- .../hlsl/impl/emulated_float64_t_impl.hlsl | 151 ++++--- .../hlsl/math/equations/quadratic.hlsl | 76 ++-- .../builtin/hlsl/math/equations/quartic.hlsl | 6 +- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 87 ++-- include/nbl/builtin/hlsl/tgmath.hlsl | 22 +- src/nbl/builtin/CMakeLists.txt | 3 +- 11 files changed, 648 insertions(+), 496 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl diff --git a/examples_tests b/examples_tests index 9b1edf6a48..8e853f5518 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9b1edf6a48513f28a7a9312260c3c790a8392232 +Subproject commit 8e853f551805370c64ed72313c1195bf6ededb70 diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index f3cf538e28..11839d9e0d 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -44,7 +44,7 @@ using add_pointer = std::add_pointer; #define NBL_CONSTEXPR const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_FUNC -#define NBL_CONSTEXPR_INLINE_FUNC +#define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC namespace nbl diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index dd517c2f8c..d6666c018b 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -3,13 +3,6 @@ #include -// weird dxc compiler errors -#ifndef __HLSL_VERSION -#define CONST const -#else -#define CONST -#endif - namespace nbl { namespace hlsl @@ -29,32 +22,54 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(emulated_float64_t val) { + return createPreserveBitPattern(bit_cast(float64_t(val))); return createPreserveBitPattern(val.data); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int32_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return createPreserveBitPattern(bit_cast(float64_t(val))); + return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(int64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int64_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return createPreserveBitPattern(bit_cast(float64_t(val))); + return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint32_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return createPreserveBitPattern(bit_cast(float64_t(val))); + return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint64_t val) { - return emulated_float64_t(bit_cast(float64_t(val))); + return createPreserveBitPattern(bit_cast(float64_t(val))); + return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(val)); + } + + NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float32_t val) + { + return createPreserveBitPattern(bit_cast(float64_t(val))); + emulated_float64_t output; + output.data = impl::castToUint64WithFloat64BitPattern(val); + return output; } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float64_t val) { - return emulated_float64_t(bit_cast(val)); + return createPreserveBitPattern(bit_cast(float64_t(val))); +#ifdef __HLSL_VERSION + emulated_float64_t retval; + uint32_t lo, hi; + asuint(val, lo, hi); + retval.data = (uint64_t(hi) << 32) | lo; + return retval; +#else + return createPreserveBitPattern(reinterpret_cast(val)); +#endif } // TODO: unresolved external symbol imath_half_to_float_table @@ -63,11 +78,6 @@ namespace hlsl return emulated_float64_t(bit_cast(float64_t(val))); }*/ - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float32_t val) - { - return emulated_float64_t(bit_cast(float64_t(val))); - } - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t createPreserveBitPattern(uint64_t val) { return emulated_float64_t(val); @@ -75,9 +85,11 @@ namespace hlsl inline float getAsFloat32() { - // TODO: don't use double - return float(bit_cast(data)); - + // TODO: fix + uint32_t sign = uint32_t((data & ieee754::traits::signMask) >> 32); + uint32_t exponent = (uint32_t(ieee754::extractExponent(data)) + ieee754::traits::exponentBias) + ieee754::traits::mantissaBitCnt; + uint32_t mantissa = uint32_t(data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; + return sign | exponent | mantissa; } #if 0 @@ -91,7 +103,7 @@ namespace hlsl #endif // arithmetic operators - emulated_float64_t operator+(const emulated_float64_t rhs) CONST + emulated_float64_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { #if 0 { @@ -321,7 +333,12 @@ namespace hlsl } } - emulated_float64_t operator-(emulated_float64_t rhs) CONST + emulated_float64_t operator+(float rhs) + { + return createPreserveBitPattern(data) + create(rhs); + } + + emulated_float64_t operator-(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { emulated_float64_t lhs = createPreserveBitPattern(data); emulated_float64_t rhsFlipped = rhs.flipSign(); @@ -329,7 +346,12 @@ namespace hlsl return lhs + rhsFlipped; } - emulated_float64_t operator*(emulated_float64_t rhs) CONST + emulated_float64_t operator-(float rhs) NBL_CONST_MEMBER_FUNC + { + return createPreserveBitPattern(data) - create(rhs); + } + + emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if(FlushDenormToZero) { @@ -407,6 +429,11 @@ namespace hlsl } } + emulated_float64_t operator*(float rhs) + { + return createPreserveBitPattern(data) * create(rhs); + } + /*emulated_float64_t reciprocal(uint64_t x) { using ThisType = emulated_float64_t; @@ -415,7 +442,7 @@ namespace hlsl return output; }*/ - emulated_float64_t operator/(const emulated_float64_t rhs) CONST + emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if (FlushDenormToZero) { @@ -466,7 +493,7 @@ namespace hlsl // relational operators // TODO: should `FlushDenormToZero` affect relational operators? - bool operator==(emulated_float64_t rhs) CONST + bool operator==(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; @@ -481,11 +508,11 @@ namespace hlsl return !(xored.data); } - bool operator!=(emulated_float64_t rhs) CONST + bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return !(createPreserveBitPattern(data) == rhs); } - bool operator<(emulated_float64_t rhs) CONST + bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; @@ -505,7 +532,7 @@ namespace hlsl return (lhsFlipped & diffBits) < (rhsFlipped & diffBits); } - bool operator>(emulated_float64_t rhs) CONST + bool operator>(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return true; @@ -525,13 +552,13 @@ namespace hlsl return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); } - bool operator<=(emulated_float64_t rhs) CONST { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } + bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } bool operator>=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) < emulated_float64_t::createPreserveBitPattern(rhs.data)); } //logical operators - bool operator&&(emulated_float64_t rhs) CONST { return bool(data) && bool(rhs.data); } - bool operator||(emulated_float64_t rhs) CONST { return bool(data) || bool(rhs.data); } - bool operator!() CONST { return !bool(data); } + bool operator&&(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) && bool(rhs.data); } + bool operator||(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) || bool(rhs.data); } + bool operator!() NBL_CONST_MEMBER_FUNC { return !bool(data); } // TODO: should modify self? emulated_float64_t flipSign() @@ -541,7 +568,7 @@ namespace hlsl bool isNaN() { - return tgmath::isnan(bit_cast(data)); + return tgmath::isnan(data); } }; @@ -554,31 +581,31 @@ struct traits_base\ NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52;\ };\ template<>\ -static inline uint32_t extractBiasedExponent(Type x)\ +inline uint32_t extractBiasedExponent(Type x)\ {\ return extractBiasedExponent(x.data);\ }\ \ template<>\ -static inline int extractExponent(Type x)\ +inline int extractExponent(Type x)\ {\ return extractExponent(x.data);\ }\ \ template<>\ -NBL_CONSTEXPR_STATIC_INLINE Type replaceBiasedExponent(Type x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_INLINE_FUNC Type replaceBiasedExponent(Type x, typename unsigned_integer_of_size::type biasedExp)\ {\ return Type(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_STATIC_INLINE Type fastMulExp2(Type x, int n)\ +NBL_CONSTEXPR_INLINE_FUNC Type fastMulExp2(Type x, int n)\ {\ return Type(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size::type extractMantissa(Type x)\ +NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(Type x)\ {\ return extractMantissa(x.data);\ }\ @@ -591,296 +618,9 @@ namespace ieee754 IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); } -// TODO: finish it - -// TODO: this is mess, refactorize it -#ifndef __HLSL_VERSION -using ef64_t2 = vector; -using ef64_t3 = vector; -using ef64_t4 = vector; -using ef64_t3x3 = matrix; -using ef64_t2x2 = matrix; -#else -struct ef64_t2 -{ - emulated_float64_t x; - emulated_float64_t y; - - emulated_float64_t calcComponentSum() CONST - { - return x + y; - } - - NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(emulated_float64_t x, emulated_float64_t y) - { - ef64_t2 output; - output.x = x; - output.y = y; - - return output; - } - - NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float val) - { - ef64_t2 output; - output.x = emulated_float64_t::create(val); - output.y = emulated_float64_t::create(val); - - return output; - } - - NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float32_t2 val) - { - ef64_t2 output; - output.x = emulated_float64_t::create(val.x); - output.y = emulated_float64_t::create(val.y); - - return output; - } - - ef64_t2 operator+(float rhs) - { - ef64_t2 output; - emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); - output.x = x + rhsAsEF64; - output.y = y + rhsAsEF64; - - return output; - } - - ef64_t2 operator+(emulated_float64_t rhs) - { - ef64_t2 output; - output.x = x + rhs; - output.y = y + rhs; - - return output; - } - - ef64_t2 operator+(ef64_t2 rhs) - { - ef64_t2 output; - output.x = x + rhs.x; - output.y = y + rhs.y; - - return output; - } - - ef64_t2 operator-(float rhs) - { - return create(x, y) + (-rhs); - } - - ef64_t2 operator-(emulated_float64_t rhs) - { - return create(x, y) + (rhs.flipSign()); - } - - ef64_t2 operator-(ef64_t2 rhs) - { - rhs.x = rhs.x.flipSign(); - rhs.y = rhs.y.flipSign(); - return create(x, y) + rhs; - } - - ef64_t2 operator*(float rhs) - { - ef64_t2 output; - emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); - output.x = x * rhsAsEF64; - output.y = y * rhsAsEF64; - - return output; - } - - ef64_t2 operator*(emulated_float64_t rhs) - { - ef64_t2 output; - output.x = x * rhs; - output.y = y * rhs; - - return output; - } - - ef64_t2 operator*(ef64_t2 rhs) - { - ef64_t2 output; - output.x = x * rhs.x; - output.y = y * rhs.y; - - return output; - } - - float2 getAsFloat2() - { - return float2(x.getAsFloat32(), y.getAsFloat32()); - } -}; - -struct ef64_t3 -{ - emulated_float64_t x; - emulated_float64_t y; - emulated_float64_t z; - - static ef64_t3 create(NBL_REF_ARG(ef64_t3) other) - { - ef64_t3 output; - - output.x = other.x; - output.y = other.y; - output.z = other.z; - - return output; - } - - static ef64_t3 create(NBL_REF_ARG(ef64_t2) other, emulated_float64_t z) - { - ef64_t3 output; - - output.x = other.x; - output.y = other.y; - output.z = z; - - return output; - } - - static ef64_t3 create(NBL_REF_ARG(ef64_t2) other, int z) - { - ef64_t3 output; - - output.x = other.x; - output.y = other.y; - output.z = emulated_float64_t::create(z); - - return output; - } - - emulated_float64_t calcComponentSum() CONST - { - return x + y + z; - } - - ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) CONST - { - ef64_t3 output; - output.x = x * rhs.x; - output.y = x * rhs.y; - output.z = x * rhs.z; - - return output; - } -}; - -struct ef64_t4 -{ - emulated_float64_t x; - emulated_float64_t y; - emulated_float64_t z; - emulated_float64_t w; -}; - -struct ef64_t3x3 -{ - ef64_t3 columns[3]; - - ef64_t3x3 getTransposed() CONST - { - ef64_t3x3 output; - - output.columns[1].x = columns[0].y; - output.columns[2].x = columns[0].z; - - output.columns[0].y = columns[1].x; - output.columns[2].y = columns[1].z; - - output.columns[0].z = columns[3].x; - output.columns[1].z = columns[3].y; - - return output; - } - - ef64_t3x3 operator*(NBL_CONST_REF_ARG(ef64_t3x3) rhs) CONST - { - ef64_t3x3 output; - ef64_t3x3 lhsTransposed = getTransposed(); - - output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); - output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); - output.columns[0].z = (lhsTransposed.columns[0] * rhs.columns[2]).calcComponentSum(); - - output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); - output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); - output.columns[1].z = (lhsTransposed.columns[1] * rhs.columns[2]).calcComponentSum(); - - output.columns[2].x = (lhsTransposed.columns[2] * rhs.columns[0]).calcComponentSum(); - output.columns[2].y = (lhsTransposed.columns[2] * rhs.columns[1]).calcComponentSum(); - output.columns[2].z = (lhsTransposed.columns[2] * rhs.columns[2]).calcComponentSum(); - - return output; - } - - ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) - { - ef64_t3 output; - ef64_t3x3 lhsTransposed = getTransposed(); - - output.x = (columns[0] * rhs).calcComponentSum(); - output.y = (columns[1] * rhs).calcComponentSum(); - output.z = (columns[2] * rhs).calcComponentSum(); - - return output; - } -}; - -struct ef64_t2x2 -{ - ef64_t2 columns[2]; - - ef64_t2x2 getTransposed() CONST - { - ef64_t2x2 output; - - output.columns[1].x = columns[0].y; - output.columns[0].y = columns[1].x; - - return output; - } - - ef64_t2x2 operator*(NBL_CONST_REF_ARG(ef64_t2x2) rhs) CONST - { - ef64_t2x2 output; - ef64_t2x2 lhsTransposed = getTransposed(); - - output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); - output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); - - output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); - output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); - - return output; - } - - ef64_t2 operator*(NBL_CONST_REF_ARG(ef64_t2) rhs) - { - ef64_t2 output; - ef64_t2x2 lhsTransposed = getTransposed(); - - output.x = (columns[0] * rhs).calcComponentSum(); - output.y = (columns[1] * rhs).calcComponentSum(); - - return output; - } -}; - -#endif - } - } -#undef CONST - #undef FLOAT_ROUND_NEAREST_EVEN #undef FLOAT_ROUND_TO_ZERO #undef FLOAT_ROUND_DOWN diff --git a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl new file mode 100644 index 0000000000..4fe18cc5e8 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl @@ -0,0 +1,365 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +// should i use this namespace? +//namespace ef64_util +//{ +// TODO: this is mess, refactorize it +#ifndef __HLSL_VERSION +using ef64_t2 = float64_t2; +using ef64_t3 = float64_t3; +using ef64_t4 = float64_t4; +using ef64_t3x3 = float64_t3x3; +using ef64_t2x2 = float64_t4x4; +#else +struct ef64_t2 +{ + emulated_float64_t x; + emulated_float64_t y; + + emulated_float64_t calcComponentSum() NBL_CONST_MEMBER_FUNC + { + return x + y; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(emulated_float64_t x, emulated_float64_t y) + { + ef64_t2 output; + output.x = x; + output.y = y; + + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float val) + { + ef64_t2 output; + output.x = emulated_float64_t::create(val); + output.y = emulated_float64_t::create(val); + + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float32_t2 val) + { + ef64_t2 output; + output.x = emulated_float64_t::create(val.x); + output.y = emulated_float64_t::create(val.y); + + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(uint32_t2 val) + { + ef64_t2 output; + output.x = emulated_float64_t::create(val.x); + output.y = emulated_float64_t::create(val.y); + + return output; + } + + ef64_t2 operator+(float rhs) + { + ef64_t2 output; + emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); + output.x = x + rhsAsEF64; + output.y = y + rhsAsEF64; + + return output; + } + + ef64_t2 operator+(emulated_float64_t rhs) + { + ef64_t2 output; + output.x = x + rhs; + output.y = y + rhs; + + return output; + } + + ef64_t2 operator+(ef64_t2 rhs) + { + ef64_t2 output; + output.x = x + rhs.x; + output.y = y + rhs.y; + + return output; + } + + ef64_t2 operator-(float rhs) + { + return create(x, y) + (-rhs); + } + + ef64_t2 operator-(emulated_float64_t rhs) + { + return create(x, y) + (rhs.flipSign()); + } + + ef64_t2 operator-(ef64_t2 rhs) + { + rhs.x = rhs.x.flipSign(); + rhs.y = rhs.y.flipSign(); + return create(x, y) + rhs; + } + + ef64_t2 operator*(float rhs) + { + ef64_t2 output; + emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); + output.x = x * rhsAsEF64; + output.y = y * rhsAsEF64; + + return output; + } + + ef64_t2 operator*(emulated_float64_t rhs) + { + ef64_t2 output; + output.x = x * rhs; + output.y = y * rhs; + + return output; + } + + ef64_t2 operator*(ef64_t2 rhs) + { + ef64_t2 output; + output.x = x * rhs.x; + output.y = y * rhs.y; + + return output; + } + +#ifdef __HLSL_VERSION + float2 getAsFloat2() + { + return float2(x.getAsFloat32(), y.getAsFloat32()); + } +#endif +}; + +struct ef64_t3 +{ + emulated_float64_t x; + emulated_float64_t y; + emulated_float64_t z; + + NBL_CONSTEXPR_STATIC_INLINE ef64_t3 create(NBL_REF_ARG(ef64_t3) other) + { + ef64_t3 output; + + output.x = other.x; + output.y = other.y; + output.z = other.z; + + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t3 create(NBL_REF_ARG(ef64_t2) other, emulated_float64_t z) + { + ef64_t3 output; + + output.x = other.x; + output.y = other.y; + output.z = z; + + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE ef64_t3 create(NBL_REF_ARG(ef64_t2) other, int z) + { + ef64_t3 output; + + output.x = other.x; + output.y = other.y; + output.z = emulated_float64_t::create(z); + + return output; + } + + emulated_float64_t calcComponentSum() NBL_CONST_MEMBER_FUNC + { + return x + y + z; + } + + ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) NBL_CONST_MEMBER_FUNC + { + ef64_t3 output; + output.x = x * rhs.x; + output.y = y * rhs.y; + output.z = z * rhs.z; + + return output; + } +}; + +struct ef64_t4 +{ + emulated_float64_t x; + emulated_float64_t y; + emulated_float64_t z; + emulated_float64_t w; +}; + +struct ef64_t3x3 +{ + ef64_t3 columns[3]; + + ef64_t3x3 getTransposed() NBL_CONST_MEMBER_FUNC + { + ef64_t3x3 output; + + output.columns[0].x = columns[0].x; + output.columns[1].x = columns[0].y; + output.columns[2].x = columns[0].z; + + output.columns[0].y = columns[1].x; + output.columns[1].y = columns[1].y; + output.columns[2].y = columns[1].z; + + output.columns[0].z = columns[2].x; + output.columns[1].z = columns[2].y; + output.columns[2].z = columns[2].z; + + return output; + } + + ef64_t3x3 operator*(NBL_CONST_REF_ARG(ef64_t3x3) rhs) NBL_CONST_MEMBER_FUNC + { + ef64_t3x3 output; + ef64_t3x3 lhsTransposed = getTransposed(); + + output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); + output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); + output.columns[0].z = (lhsTransposed.columns[0] * rhs.columns[2]).calcComponentSum(); + + output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); + output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); + output.columns[1].z = (lhsTransposed.columns[1] * rhs.columns[2]).calcComponentSum(); + + output.columns[2].x = (lhsTransposed.columns[2] * rhs.columns[0]).calcComponentSum(); + output.columns[2].y = (lhsTransposed.columns[2] * rhs.columns[1]).calcComponentSum(); + output.columns[2].z = (lhsTransposed.columns[2] * rhs.columns[2]).calcComponentSum(); + + // TODO: avoid transpose + return output.getTransposed(); + } + + ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) + { + ef64_t3 output; + ef64_t3x3 lhsTransposed = getTransposed(); + + output.x = (columns[0] * rhs).calcComponentSum(); + output.y = (columns[1] * rhs).calcComponentSum(); + output.z = (columns[2] * rhs).calcComponentSum(); + + return output; + } +}; + +struct ef64_t2x2 +{ + ef64_t2 columns[2]; + + ef64_t2x2 getTransposed() NBL_CONST_MEMBER_FUNC + { + ef64_t2x2 output; + + output.columns[0].x = columns[0].x; + output.columns[1].x = columns[0].y; + + output.columns[0].y = columns[1].x; + output.columns[1].y = columns[1].y; + + return output; + } + + ef64_t2x2 operator*(NBL_CONST_REF_ARG(ef64_t2x2) rhs) NBL_CONST_MEMBER_FUNC + { + ef64_t2x2 output; + ef64_t2x2 lhsTransposed = getTransposed(); + + output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); + output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); + + output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); + output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); + + return output.getTransposed(); + } + + ef64_t2 operator*(NBL_CONST_REF_ARG(ef64_t2) rhs) + { + ef64_t2 output; + ef64_t2x2 lhsTransposed = getTransposed(); + + output.x = (columns[0] * rhs).calcComponentSum(); + output.y = (columns[1] * rhs).calcComponentSum(); + + return output; + } +}; + +#endif + +// struct VecT is solution to +// error: 'nbl::hlsl::emulated_float64_t' cannot be used as a type parameter where a scalar is required +// using float_t2 = typename conditional >::value, ef64_t2, vector >::type; + +// TODO: better solution + +#ifndef __HLSL_VERSION +using F64_t = double; +#else +using F64_t = emulated_float64_t; +#endif + +template +struct VecT { using type = void; }; + +template<> +struct VecT { using type = vector; }; +template<> +struct VecT { using type = vector; }; +template<> +struct VecT { using type = vector; }; + +#ifndef __HLSL_VERSION +template<> +struct VecT { using type = float64_t2; }; +template<> +struct VecT { using type = float64_t3; }; +template<> +struct VecT { using type = float64_t4; }; +#endif + +template<> +struct VecT, 2> { using type = ef64_t2; }; +template<> +struct VecT, 3> { using type = ef64_t3; }; +template<> +struct VecT, 4> { using type = ef64_t4; }; + +template +struct Mat2x2T { using type = void; }; +template<> +struct Mat2x2T { using type = float32_t2x2; }; +#ifndef __HLSL_VERSION +template<> +struct Mat2x2T { using type = float64_t2x2; }; +#endif +template<> +struct Mat2x2T > { using type = ef64_t2x2; }; + +//} +} +} +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index e92b45713f..13308bb2dd 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -11,10 +11,12 @@ namespace hlsl { namespace ieee754 { + +// TODO: move to builtin/hlsl/impl/ieee754_impl.hlsl? namespace impl { template - NBL_CONSTEXPR_STATIC_INLINE bool isTypeAllowed() + NBL_CONSTEXPR_INLINE_FUNC bool isTypeAllowed() { return is_same::value || is_same::value || @@ -25,25 +27,25 @@ namespace impl } template - NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type castToUintType(T x) + NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type castToUintType(T x) { using AsUint = typename unsigned_integer_of_size::type; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t - template <> NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } - template <> NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } - template <> NBL_CONSTEXPR_STATIC_INLINE unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } + template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } + template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } + template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } template - NBL_CONSTEXPR_STATIC_INLINE T castBackToFloatType(T x) + NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } - template<> NBL_CONSTEXPR_STATIC_INLINE uint16_t castBackToFloatType(uint16_t x) { return x; } - template<> NBL_CONSTEXPR_STATIC_INLINE uint32_t castBackToFloatType(uint32_t x) { return x; } - template<> NBL_CONSTEXPR_STATIC_INLINE uint64_t castBackToFloatType(uint64_t x) { return x; } + template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } + template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } + template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } template @@ -89,37 +91,39 @@ struct traits : traits_base NBL_CONSTEXPR_STATIC_INLINE bit_rep_t inf = exponentMask; NBL_CONSTEXPR_STATIC_INLINE bit_rep_t specialValueExp = (1ull << base_t::exponentBitCnt) - 1; NBL_CONSTEXPR_STATIC_INLINE bit_rep_t quietNaN = exponentMask | (1ull << (base_t::mantissaBitCnt - 1)); + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t max = ((1ull << (sizeof(Float) * 8 - 1)) - 1) & (~(1ull << base_t::mantissaBitCnt)); + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t min = 1ull << base_t::mantissaBitCnt; }; template -static inline uint32_t extractBiasedExponent(T x) +inline uint32_t extractBiasedExponent(T x) { using AsUint = typename unsigned_integer_of_size::type; return glsl::bitfieldExtract(impl::castToUintType(x), traits::type>::mantissaBitCnt, traits::type>::exponentBitCnt); } template<> -static inline uint32_t extractBiasedExponent(uint64_t x) +inline uint32_t extractBiasedExponent(uint64_t x) { const uint32_t highBits = uint32_t(x >> 32); return glsl::bitfieldExtract(highBits, traits::mantissaBitCnt - 32, traits::exponentBitCnt); } template<> -static inline uint32_t extractBiasedExponent(float64_t x) +inline uint32_t extractBiasedExponent(float64_t x) { return extractBiasedExponent(impl::castToUintType(x)); } template -static inline int extractExponent(T x) +inline int extractExponent(T x) { using AsFloat = typename float_of_size::type; return int(extractBiasedExponent(x)) - int(traits::exponentBias); } template -NBL_CONSTEXPR_STATIC_INLINE T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { // TODO: //staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); @@ -129,26 +133,26 @@ NBL_CONSTEXPR_STATIC_INLINE T replaceBiasedExponent(T x, typename unsigned_integ // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_STATIC_INLINE T fastMulExp2(T x, int n) +NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return impl::castToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) { return (impl::castToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_STATIC_INLINE typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { return impl::castToUintType(x) & traits::signMask; } diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index da9586207f..bdaa9202a9 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -47,7 +47,7 @@ namespace hlsl { namespace impl { -NBL_CONSTEXPR_STATIC_INLINE uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -56,24 +56,102 @@ NBL_CONSTEXPR_STATIC_INLINE uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) return output; } -template -NBL_CONSTEXPR_STATIC_INLINE uint64_t promoteToUint64(T val) +NBL_CONSTEXPR_INLINE_FUNC uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) { - using AsFloat = typename float_of_size::type; - uint64_t asUint = ieee754::impl::castToUintType(val); + uint32_t2 z; - const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float64_t) - sizeof(T)); - const int64_t newExponent = ieee754::extractExponent(val) + ieee754::traits::exponentBias; + z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; + z.y = zFrac1; - const uint64_t exp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); - const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::exponentBias - ieee754::traits::mantissaBitCnt); + uint64_t output = 0u; + output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; + output |= uint64_t(z.y); + return output; +} - return sign | exp | mantissa; +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 packUint64(uint64_t val) +{ + return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); +} + +NBL_CONSTEXPR_INLINE_FUNC uint64_t unpackUint64(uint32_t2 val) +{ + return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); +} + +inline uint64_t castToUint64WithFloat64BitPattern(float32_t val) +{ + uint32_t asUint = ieee754::impl::castToUintType(val); + + const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float32_t) * 8); + + const uint64_t biasedExp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt); + + return sign | biasedExp | mantissa; }; -template<> NBL_CONSTEXPR_STATIC_INLINE uint64_t promoteToUint64(float64_t val) { return bit_cast(val); } +inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) +{ + if (val == 0) + return val; + +#ifndef __HLSL_VERSION + int exp = findMSB(val); +#else + uint32_t2 valPacked = packUint64(val); + int exp = valPacked.x ? firstbithigh(valPacked.x) + 32 : firstbithigh(valPacked.y); +#endif + uint64_t mantissa; + + int shiftCnt = 52 - exp; + if (shiftCnt >= 0) + { + mantissa = val << shiftCnt; + } + else + { + const int shiftCntAbs = -shiftCnt; + uint64_t roundingBit = 1ull << (shiftCnt - 1); + uint64_t stickyBitMask = roundingBit - 1; + uint64_t stickyBit = val & stickyBitMask; + + mantissa = val >> shiftCntAbs; -NBL_CONSTEXPR_STATIC_INLINE uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) + if ((val & roundingBit) && (!stickyBit)) + { + bool isEven = mantissa & 1; + if (!isEven) + mantissa++; + } + else if ((val & roundingBit) && (stickyBit || (mantissa & 1))) + val += roundingBit; + + + + //val += (1ull << (shiftCnt)) - 1; + //mantissa = val >> shiftCntAbs; + + if (mantissa & 1ull << 53) + { + mantissa >>= 1; + exp++; + } + } + mantissa &= ieee754::traits::mantissaMask; + const uint64_t biasedExp = uint64_t(ieee754::traits::exponentBias + exp) << ieee754::traits::mantissaBitCnt; + + return biasedExp | mantissa; +}; + +inline uint64_t castToUint64WithFloat64BitPattern(int64_t val) +{ + const uint64_t sign = val & ieee754::traits::signMask; + const uint64_t absVal = abs(val); + return sign | castToUint64WithFloat64BitPattern(absVal); +}; + +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) { uint64_t product = uint64_t(lhs) * uint64_t(rhs); uint32_t2 output; @@ -82,7 +160,7 @@ NBL_CONSTEXPR_STATIC_INLINE uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) return output; } -NBL_CONSTEXPR_STATIC_INLINE uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_INLINE_FUNC uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) { #if defined RELAXED_NAN_PROPAGATION return lhs | rhs; @@ -95,30 +173,7 @@ NBL_CONSTEXPR_STATIC_INLINE uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t #endif } -NBL_CONSTEXPR_STATIC_INLINE uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) -{ - uint32_t2 z; - - z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; - z.y = zFrac1; - - uint64_t output = 0u; - output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; - output |= uint64_t(z.y); - return output; -} - -NBL_CONSTEXPR_STATIC_INLINE uint32_t2 packUint64(uint64_t val) -{ - return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); -} - -NBL_CONSTEXPR_STATIC_INLINE uint64_t unpackUint64(uint32_t2 val) -{ - return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); -} - -NBL_CONSTEXPR_STATIC_INLINE uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { uint32_t2 output; output.y = a1 + b1; @@ -127,7 +182,7 @@ NBL_CONSTEXPR_STATIC_INLINE uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b return output; } -NBL_CONSTEXPR_STATIC_INLINE uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) { uint32_t2 output; output.y = a1 - b1; @@ -146,7 +201,7 @@ static inline int countLeadingZeros32(uint32_t val) #endif } -NBL_CONSTEXPR_STATIC_INLINE uint32_t2 shift64RightJamming(uint32_t2 val, int count) +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count) { uint32_t2 output; const int negCount = (-count) & 31; @@ -166,7 +221,7 @@ NBL_CONSTEXPR_STATIC_INLINE uint32_t2 shift64RightJamming(uint32_t2 val, int cou } -NBL_CONSTEXPR_STATIC_INLINE uint32_t4 mul64to128(uint32_t4 mantissasPacked) +NBL_CONSTEXPR_INLINE_FUNC uint32_t4 mul64to128(uint32_t4 mantissasPacked) { uint32_t4 output; uint32_t more1 = 0u; @@ -205,7 +260,7 @@ NBL_CONSTEXPR_STATIC_INLINE uint32_t4 mul64to128(uint32_t4 mantissasPacked) return output; } -NBL_CONSTEXPR_STATIC_INLINE uint32_t3 shift64ExtraRightJamming(uint32_t3 val, int count) +NBL_CONSTEXPR_INLINE_FUNC uint32_t3 shift64ExtraRightJamming(uint32_t3 val, int count) { uint32_t3 output; output.x = 0u; @@ -233,7 +288,7 @@ NBL_CONSTEXPR_STATIC_INLINE uint32_t3 shift64ExtraRightJamming(uint32_t3 val, in return output; } -NBL_CONSTEXPR_STATIC_INLINE uint64_t shortShift64Left(uint64_t val, int count) +NBL_CONSTEXPR_INLINE_FUNC uint64_t shortShift64Left(uint64_t val, int count) { const uint32_t2 packed = packUint64(val); @@ -245,12 +300,12 @@ NBL_CONSTEXPR_STATIC_INLINE uint64_t shortShift64Left(uint64_t val, int count) return unpackUint64(output); }; -NBL_CONSTEXPR_STATIC_INLINE uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted + expShifted + mantissa; } -NBL_CONSTEXPR_STATIC_INLINE uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) +NBL_CONSTEXPR_INLINE_FUNC uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) { bool roundNearestEven; bool increment; @@ -372,7 +427,7 @@ static inline void normalizeFloat64Subnormal(uint64_t mantissa, outMantissa = tgmath::lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); } -NBL_CONSTEXPR_STATIC_INLINE bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -380,17 +435,17 @@ NBL_CONSTEXPR_STATIC_INLINE bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_STATIC_INLINE bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) { return lhs == rhs && (lhs & ~ieee754::traits::signMask) == ieee754::traits::inf; } -NBL_CONSTEXPR_STATIC_INLINE bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); } -NBL_CONSTEXPR_STATIC_INLINE bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return ((lhs << 1) == 0ull) && (lhs == rhs); } diff --git a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl index 9ec0156bc4..d8dacdc7bb 100644 --- a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl +++ b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl @@ -5,6 +5,8 @@ #ifndef _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ #define _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ +#include + // TODO: Later include from correct hlsl header #ifndef nbl_hlsl_FLT_EPSILON #define nbl_hlsl_FLT_EPSILON 5.96046447754e-08 @@ -24,53 +26,53 @@ namespace math { namespace equations { - template - struct Quadratic +template +struct Quadratic +{ + using float_t22 = typename VecT::type; + using float_t33 = typename VecT::type; + + float_t a; + float_t b; + float_t c; + + static Quadratic construct(float_t a, float_t b, float_t c) { - using float_t2 = vector; - using float_t3 = vector; + Quadratic ret = { a, b, c }; + return ret; + } - float_t a; - float_t b; - float_t c; + float_t evaluate(float_t t) + { + return t * (a * t + b) + c; + } - static Quadratic construct(float_t a, float_t b, float_t c) - { - Quadratic ret = { a, b, c }; - return ret; - } + float_t22 computeRoots() + { + float_t22 ret; + + const float_t det = b * b - 4.0 * a * c; + const float_t detSqrt = sqrt(det); + const float_t rcp = 0.5 / a; + const float_t bOver2A = b * rcp; - float_t evaluate(float_t t) + float_t t0 = 0.0, t1 = 0.0; + if (b >= 0) { - return t * (a * t + b) + c; + ret[0] = -detSqrt * rcp - bOver2A; + ret[1] = 2 * c / (-b - detSqrt); } - - float_t2 computeRoots() + else { - float_t2 ret; - - const float_t det = b * b - 4.0 * a * c; - const float_t detSqrt = sqrt(det); - const float_t rcp = 0.5 / a; - const float_t bOver2A = b * rcp; - - float_t t0 = 0.0, t1 = 0.0; - if (b >= 0) - { - ret[0] = -detSqrt * rcp - bOver2A; - ret[1] = 2 * c / (-b - detSqrt); - } - else - { - ret[0] = 2 * c / (-b + detSqrt); - ret[1] = +detSqrt * rcp - bOver2A; - } - - return ret; + ret[0] = 2 * c / (-b + detSqrt); + ret[1] = +detSqrt * rcp - bOver2A; } + return ret; + } + - }; +}; } } } diff --git a/include/nbl/builtin/hlsl/math/equations/quartic.hlsl b/include/nbl/builtin/hlsl/math/equations/quartic.hlsl index b94de0afb5..882f7a33ba 100644 --- a/include/nbl/builtin/hlsl/math/equations/quartic.hlsl +++ b/include/nbl/builtin/hlsl/math/equations/quartic.hlsl @@ -24,9 +24,9 @@ namespace equations template struct Quartic { - using float_t2 = vector; - using float_t3 = vector; - using float_t4 = vector; + using float_t2 = typename VecT::type; + using float_t3 = typename VecT::type; + using float_t4 = typename VecT::type; // form: ax^4 + bx^3 + cx^2 + dx + e float_t a; diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index e454c4c8ef..f966fc138c 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -11,6 +11,7 @@ #include #include #include +#include // TODO: Later include from correct hlsl header (numeric_limits.hlsl) #ifndef nbl_hlsl_FLT_EPSILON @@ -28,51 +29,16 @@ namespace nbl namespace hlsl { -// TODO(emulated_float64_t): this shouldn't be in the nbl::hlsl space -// struct VecT is solution to -// error: 'nbl::hlsl::emulated_float64_t' cannot be used as a type parameter where a scalar is required -// using float_t2 = typename conditional >::value, ef64_t2, vector >::type; -#ifdef __HLSL_VERSION -template -struct VecT { using type = void; }; -template<> -struct VecT { using type = vector; }; -template<> -struct VecT { using type = vector; }; -template<> -struct VecT { using type = vector; }; -template<> -struct VecT, 2> { using type = ef64_t2; }; -template<> -struct VecT, 3> { using type = ef64_t3; }; -template<> -struct VecT, 4> { using type = float64_t4; }; - -template -struct Mat2x2T { using type = float64_t2x2; }; -template<> -struct Mat2x2T { using type = float64_t2x2; }; -template<> -struct Mat2x2T > { using type = ef64_t2x2; }; - -#endif - namespace shapes { template struct QuadraticBezier { -#ifndef __HLSL_VERSION - using float_t2 = vector; - using float_t3 = vector; - using float_t4 = vector; - using float_t2x2 = matrix; -#else using float_t2 = typename VecT::type; using float_t3 = typename VecT::type; using float_t4 = typename VecT::type; using float_t2x2 = typename Mat2x2T::type; -#endif + float_t2 P0; float_t2 P1; float_t2 P2; @@ -245,18 +211,29 @@ struct QuadraticBezier template struct Quadratic { +#ifndef __HLSL_VERSION using scalar_t = float_t; using float_t2 = vector; using float_t3 = vector; using float_t2x2 = matrix; - +#else + using scalar_t = float_t; + using float_t2 = typename VecT::type; + using float_t3 = typename VecT::type; + using float_t2x2 = typename Mat2x2T::type; +#endif + float_t2 A; float_t2 B; float_t2 C; struct AnalyticArcLengthCalculator { +#ifndef __HLSL_VERSION using float_t2 = vector; +#else + using float_t2 = typename VecT::type; +#endif static AnalyticArcLengthCalculator construct(float_t lenA2, float_t AdotB, float_t a, float_t b, float_t c, float_t b_over_4a) { @@ -547,6 +524,13 @@ struct Quadratic template static math::equations::Quartic getBezierBezierIntersectionEquation(NBL_CONST_REF_ARG(QuadraticBezier) lhs, NBL_CONST_REF_ARG(QuadraticBezier) rhs) { +#ifndef __HLSL_VERSION + using scalar_t = double; +#else + using scalar_t = emulated_float64_t; +#endif + using float_t2 = typename VecT::type; + // Algorithm based on Computer Aided Geometric Design: // https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=1000&context=facpub#page99 // Chapter 17.6 describes the implicitization of a curve, which transforms it into the following format: @@ -581,30 +565,37 @@ static math::equations::Quartic getBezierBezierIntersectionEquation(NBL Quadratic quadratic = Quadratic::constructFromBezier(lhs); // for convenience - const float64_t2 A = quadratic.A; - const float64_t2 B = quadratic.B; - const float64_t2 C = quadratic.C; + const float_t2 A = quadratic.A; + const float_t2 B = quadratic.B; + const float_t2 C = quadratic.C; // substitute parametric into implicit equation: // Getting the quartic params - double a = ((A.x * A.x) * k0) + (A.x * A.y * k1) + (A.y * A.y * k2); - double b = (2 * A.x * B.x * k0) + (A.x * B.y * k1) + (B.x * A.y * k1) + (2 * A.y * B.y * k2); - double c = (2 * A.x * C.x * k0) + (A.x * C.y * k1) + (A.x * k3) + ((B.x * B.x) * k0) + (B.x * B.y * k1) + (C.x * A.y * k1) + (2 * A.y * C.y * k2) + (A.y * k4) + ((B.y * B.y) * k2); - double d = (2 * B.x * C.x * k0) + (B.x * C.y * k1) + (B.x * k3) + (C.x * B.y * k1) + (2 * B.y * C.y * k2) + (B.y * k4); - double e = ((C.x * C.x) * k0) + (C.x * C.y * k1) + (C.x * k3) + ((C.y * C.y) * k2) + (C.y * k4) + (k5); + scalar_t a = ((A.x * A.x) * k0) + (A.x * A.y * k1) + (A.y * A.y * k2); + scalar_t b = (A.x * B.x * k0 * 2.0f) + (A.x * B.y * k1) + (B.x * A.y * k1) + (A.y * B.y * k2 * 2.0f); + scalar_t c = (A.x * C.x * k0 * 2.0f) + (A.x * C.y * k1) + (A.x * k3) + ((B.x * B.x) * k0) + (B.x * B.y * k1) + (C.x * A.y * k1) + (A.y * C.y * k2 * 2.0f) + (A.y * k4) + ((B.y * B.y) * k2); + scalar_t d = (B.x * C.x * k0 * 2.0f) + (B.x * C.y * k1) + (B.x * k3) + (C.x * B.y * k1) + (B.y * C.y * k2 * 2.0f) + (B.y * k4); + scalar_t e = ((C.x * C.x) * k0) + (C.x * C.y * k1) + (C.x * k3) + ((C.y * C.y) * k2) + (C.y * k4) + (k5); - return math::equations::Quartic::construct(a, b, c, d, e); + return math::equations::Quartic::construct(a, b, c, d, e); } // This function returns the analytic quadratic equation to solve for bezier's t value for intersection with another bezier curve template -static math::equations::Quadratic getBezierLineIntersectionEquation(QuadraticBezier bezier, NBL_CONST_REF_ARG(vector) lineStart, NBL_CONST_REF_ARG(vector) lineVector) +static math::equations::Quadratic getBezierLineIntersectionEquation(QuadraticBezier bezier, NBL_CONST_REF_ARG(typename VecT::type) lineStart, NBL_CONST_REF_ARG(typename VecT::type) lineVector) { +#ifndef __HLSL_VERSION using float_t2 = vector; using float_t3 = vector; using float_t4 = vector; using float_t2x2 = matrix; +#else + using float_t2 = typename VecT::type; + using float_t3 = typename VecT::type; + using float_t4 = typename VecT::type; + using float_t2x2 = typename Mat2x2T::type; +#endif float_t2 lineDir = normalize(lineVector); float_t2x2 rotate = float_t2x2(float_t2(lineDir.x, lineDir.y), float_t2(-lineDir.y, lineDir.x)); @@ -612,7 +603,7 @@ static math::equations::Quadratic getBezierLineIntersectionEquation(Qua bezier.P1 = mul(rotate, bezier.P1 - lineStart); bezier.P2 = mul(rotate, bezier.P2 - lineStart); Quadratic quadratic = Quadratic::constructFromBezier(bezier); - return math::equations::Quadratic::construct(quadratic.A.y, quadratic.B.y, quadratic.C.y); + return math::equations::Quadratic::construct(quadratic.A.y, quadratic.B.y, quadratic.C.y); } } // namespace shapes diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index d3afb09bb7..3be5230f7f 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -16,31 +16,25 @@ namespace hlsl namespace tgmath { -template -static inline bool isnan(Float val) +template +inline bool isnan(T val) { - using AsUint = typename unsigned_integer_of_size::type; - using AsFloat = typename float_of_size::type; - AsUint asUint = bit_cast(val); - return bool((ieee754::extractBiasedExponent(val) == ieee754::traits::specialValueExp) && (asUint & ieee754::traits::mantissaMask)); -} + using AsUint = typename unsigned_integer_of_size::type; + using AsFloat = typename float_of_size::type; -template <> -static inline bool isnan(uint64_t val) -{ - float64_t asFloat = bit_cast(val); - return bool((ieee754::extractBiasedExponent(asFloat) == ieee754::traits::specialValueExp) && (val & ieee754::traits::mantissaMask)); + AsUint asUint = bit_cast(val); + return bool((ieee754::extractBiasedExponent(val) == ieee754::traits::specialValueExp) && (asUint & ieee754::traits::mantissaMask)); } // TODO: better implementation, also i'm not sure this is the right place for this function template -NBL_CONSTEXPR_STATIC_INLINE Uint lerp(Uint a, Uint b, bool c) +NBL_CONSTEXPR_INLINE_FUNC Uint lerp(Uint a, Uint b, bool c) { return c ? b : a; } template -NBL_CONSTEXPR_STATIC_INLINE bool isInf(Uint val) +NBL_CONSTEXPR_INLINE_FUNC bool isInf(Uint val) { using AsFloat = typename float_of_size::type; return (val & ~ieee754::traits::signMask) == ieee754::traits::inf; diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e2040ce5fe..1e584b0a97 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -235,8 +235,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/impl/emulated_float64_t_impl.hlsl") #emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t_utils.hlsl") #utility +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") From 7af8d63d023ebecb3ed16277049d263ccef61c20 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 20 Aug 2024 17:07:47 +0100 Subject: [PATCH 032/432] Fixes --- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index d6666c018b..d77a22ae2b 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -22,37 +22,31 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(emulated_float64_t val) { - return createPreserveBitPattern(bit_cast(float64_t(val))); return createPreserveBitPattern(val.data); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int32_t val) { - return createPreserveBitPattern(bit_cast(float64_t(val))); return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(int64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int64_t val) { - return createPreserveBitPattern(bit_cast(float64_t(val))); return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint32_t val) { - return createPreserveBitPattern(bit_cast(float64_t(val))); return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint64_t val) { - return createPreserveBitPattern(bit_cast(float64_t(val))); return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float32_t val) { - return createPreserveBitPattern(bit_cast(float64_t(val))); emulated_float64_t output; output.data = impl::castToUint64WithFloat64BitPattern(val); return output; @@ -85,11 +79,25 @@ namespace hlsl inline float getAsFloat32() { + int exponent = ieee754::extractExponent(data); + if (!FastMath) + { + if (exponent > 127) + return bit_cast(ieee754::traits::inf); + if (exponent < -126) + return -bit_cast(ieee754::traits::inf); + if (tgmath::isnan(data)) + return bit_cast(ieee754::traits::quietNaN); + } + + //return float(bit_cast(data)); // TODO: fix - uint32_t sign = uint32_t((data & ieee754::traits::signMask) >> 32); - uint32_t exponent = (uint32_t(ieee754::extractExponent(data)) + ieee754::traits::exponentBias) + ieee754::traits::mantissaBitCnt; + uint32_t sign = uint32_t((data & ieee754::traits::signMask) >> 32); + uint32_t biasedExponent = uint32_t(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; uint32_t mantissa = uint32_t(data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; - return sign | exponent | mantissa; + + return bit_cast(sign | biasedExponent | mantissa); + } #if 0 From bc379a7002f253cc1e9157affb99e73e457b1662 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 22 Aug 2024 18:47:46 +0100 Subject: [PATCH 033/432] Refactor --- examples_tests | 2 +- .../hlsl/emulated_float64_t_utils.hlsl | 454 +++++++++++------- .../hlsl/math/equations/quadratic.hlsl | 7 +- .../builtin/hlsl/math/equations/quartic.hlsl | 6 +- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 72 +-- 5 files changed, 308 insertions(+), 233 deletions(-) diff --git a/examples_tests b/examples_tests index 8e853f5518..0bfc208a6e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8e853f551805370c64ed72313c1195bf6ededb70 +Subproject commit 0bfc208a6e658f5b41d85d5afc74d4073a0d2f91 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl index 4fe18cc5e8..8b8ee0966c 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl @@ -1,220 +1,240 @@ #ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ #define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ +#include #include namespace nbl { namespace hlsl { -// should i use this namespace? -//namespace ef64_util -//{ -// TODO: this is mess, refactorize it +// TODO: enable +//template +//using portable_float64_t = conditional_t::shaderFloat64, float64_t, typename emulated_float64_t >; + #ifndef __HLSL_VERSION -using ef64_t2 = float64_t2; -using ef64_t3 = float64_t3; -using ef64_t4 = float64_t4; -using ef64_t3x3 = float64_t3x3; -using ef64_t2x2 = float64_t4x4; +template +using portable_float64_t = typename conditional >::type; #else -struct ef64_t2 +template +using portable_float64_t = typename conditional >::type; +#endif + +template +struct emulated_vector {}; + +template +struct emulated_vector { - emulated_float64_t x; - emulated_float64_t y; + using type = emulated_vector; - emulated_float64_t calcComponentSum() NBL_CONST_MEMBER_FUNC + EmulatedType x; + EmulatedType y; + + EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC { return x + y; } - NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(emulated_float64_t x, emulated_float64_t y) + NBL_CONSTEXPR_STATIC_INLINE type create(EmulatedType x, EmulatedType y) { - ef64_t2 output; + type output; output.x = x; output.y = y; return output; } - NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float val) - { - ef64_t2 output; - output.x = emulated_float64_t::create(val); - output.y = emulated_float64_t::create(val); - - return output; - } - - NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(float32_t2 val) - { - ef64_t2 output; - output.x = emulated_float64_t::create(val.x); - output.y = emulated_float64_t::create(val.y); - - return output; - } - - NBL_CONSTEXPR_STATIC_INLINE ef64_t2 create(uint32_t2 val) + type operator+(float rhs) { - ef64_t2 output; - output.x = emulated_float64_t::create(val.x); - output.y = emulated_float64_t::create(val.y); - - return output; - } - - ef64_t2 operator+(float rhs) - { - ef64_t2 output; - emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); + type output; + EmulatedType rhsAsEF64 = EmulatedType::create(rhs); output.x = x + rhsAsEF64; output.y = y + rhsAsEF64; return output; } - ef64_t2 operator+(emulated_float64_t rhs) + type operator+(EmulatedType rhs) { - ef64_t2 output; + type output; output.x = x + rhs; output.y = y + rhs; return output; } - ef64_t2 operator+(ef64_t2 rhs) + type operator+(type rhs) { - ef64_t2 output; + type output; output.x = x + rhs.x; output.y = y + rhs.y; return output; } - ef64_t2 operator-(float rhs) + type operator-(float rhs) { return create(x, y) + (-rhs); } - ef64_t2 operator-(emulated_float64_t rhs) + type operator-(EmulatedType rhs) { return create(x, y) + (rhs.flipSign()); } - ef64_t2 operator-(ef64_t2 rhs) + type operator-(type rhs) { rhs.x = rhs.x.flipSign(); rhs.y = rhs.y.flipSign(); return create(x, y) + rhs; } - ef64_t2 operator*(float rhs) + type operator*(float rhs) { - ef64_t2 output; - emulated_float64_t rhsAsEF64 = emulated_float64_t::create(rhs); + type output; + EmulatedType rhsAsEF64 = EmulatedType::create(rhs); output.x = x * rhsAsEF64; output.y = y * rhsAsEF64; return output; } - ef64_t2 operator*(emulated_float64_t rhs) + type operator*(EmulatedType rhs) { - ef64_t2 output; + type output; output.x = x * rhs; output.y = y * rhs; return output; } - ef64_t2 operator*(ef64_t2 rhs) + type operator*(type rhs) { - ef64_t2 output; + type output; output.x = x * rhs.x; output.y = y * rhs.y; return output; } -#ifdef __HLSL_VERSION - float2 getAsFloat2() + float32_t2 getAsFloat2() { - return float2(x.getAsFloat32(), y.getAsFloat32()); + return float32_t2(x.getAsFloat32(), y.getAsFloat32()); } -#endif }; -struct ef64_t3 +template +struct emulated_vector { - emulated_float64_t x; - emulated_float64_t y; - emulated_float64_t z; + using type = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE ef64_t3 create(NBL_REF_ARG(ef64_t3) other) + EmulatedType x; + EmulatedType y; + EmulatedType z; + + EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC { - ef64_t3 output; + return x + y + z; + } - output.x = other.x; - output.y = other.y; - output.z = other.z; + type operator*(NBL_CONST_REF_ARG(type) rhs) NBL_CONST_MEMBER_FUNC + { + type output; + output.x = x * rhs.x; + output.y = y * rhs.y; + output.z = z * rhs.z; return output; } +}; - NBL_CONSTEXPR_STATIC_INLINE ef64_t3 create(NBL_REF_ARG(ef64_t2) other, emulated_float64_t z) - { - ef64_t3 output; +template +struct emulated_vector +{ + using type = emulated_vector; - output.x = other.x; - output.y = other.y; - output.z = z; + EmulatedType x; + EmulatedType y; + EmulatedType z; + EmulatedType w; +}; - return output; - } +template +using emulated_vector_t2 = emulated_vector; +template +using emulated_vector_t3 = emulated_vector; +template +using emulated_vector_t4 = emulated_vector; + +//template +//struct emulated_matrix_base +//{ +// using vec_t = emulated_vector; +// vec_t columns[M]; +//}; + +template +struct emulated_matrix {}; // : emulated_matrix_base {}; - NBL_CONSTEXPR_STATIC_INLINE ef64_t3 create(NBL_REF_ARG(ef64_t2) other, int z) +template +struct emulated_matrix// : emulated_matrix_base +{ + using vec_t = emulated_vector_t2; + using type = emulated_matrix; + + vec_t columns[2]; + + type getTransposed() NBL_CONST_MEMBER_FUNC { - ef64_t3 output; + type output; + + output.columns[0].x = columns[0].x; + output.columns[1].x = columns[0].y; - output.x = other.x; - output.y = other.y; - output.z = emulated_float64_t::create(z); + output.columns[0].y = columns[1].x; + output.columns[1].y = columns[1].y; return output; } - emulated_float64_t calcComponentSum() NBL_CONST_MEMBER_FUNC + type operator*(NBL_CONST_REF_ARG(type) rhs) NBL_CONST_MEMBER_FUNC { - return x + y + z; + type output; + type lhsTransposed = getTransposed(); + + output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); + output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); + + output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); + output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); + + return output.getTransposed(); } - ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) NBL_CONST_MEMBER_FUNC + vec_t operator*(NBL_CONST_REF_ARG(vec_t) rhs) { - ef64_t3 output; - output.x = x * rhs.x; - output.y = y * rhs.y; - output.z = z * rhs.z; + vec_t output; + type lhsTransposed = getTransposed(); + + output.x = (columns[0] * rhs).calcComponentSum(); + output.y = (columns[1] * rhs).calcComponentSum(); return output; } }; -struct ef64_t4 +template +struct emulated_matrix // : emulated_matrix_base { - emulated_float64_t x; - emulated_float64_t y; - emulated_float64_t z; - emulated_float64_t w; -}; + using vec_t = emulated_vector_t3; + using type = emulated_matrix; -struct ef64_t3x3 -{ - ef64_t3 columns[3]; + vec_t columns[3]; - ef64_t3x3 getTransposed() NBL_CONST_MEMBER_FUNC + type getTransposed() NBL_CONST_MEMBER_FUNC { - ef64_t3x3 output; + type output; output.columns[0].x = columns[0].x; output.columns[1].x = columns[0].y; @@ -231,10 +251,10 @@ struct ef64_t3x3 return output; } - ef64_t3x3 operator*(NBL_CONST_REF_ARG(ef64_t3x3) rhs) NBL_CONST_MEMBER_FUNC + type operator*(NBL_CONST_REF_ARG(type) rhs) NBL_CONST_MEMBER_FUNC { - ef64_t3x3 output; - ef64_t3x3 lhsTransposed = getTransposed(); + type output; + type lhsTransposed = getTransposed(); output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); @@ -252,10 +272,10 @@ struct ef64_t3x3 return output.getTransposed(); } - ef64_t3 operator*(NBL_CONST_REF_ARG(ef64_t3) rhs) + vec_t operator*(NBL_CONST_REF_ARG(vec_t) rhs) { - ef64_t3 output; - ef64_t3x3 lhsTransposed = getTransposed(); + vec_t output; + type lhsTransposed = getTransposed(); output.x = (columns[0] * rhs).calcComponentSum(); output.y = (columns[1] * rhs).calcComponentSum(); @@ -265,101 +285,179 @@ struct ef64_t3x3 } }; -struct ef64_t2x2 +template +using emulated_matrix_t2x2 = emulated_matrix; +template +using emulated_matrix_t3x3 = emulated_matrix; + +namespace impl +{ +template +struct is_emulated { - ef64_t2 columns[2]; + NBL_CONSTEXPR_STATIC_INLINE bool value = is_same >::value || + is_same >::value || + is_same >::value || + is_same >::value; +}; - ef64_t2x2 getTransposed() NBL_CONST_MEMBER_FUNC - { - ef64_t2x2 output; +template::value > +struct portable_vector +{ + using type = emulated_vector; +}; +// specialization for builtins +template +struct portable_vector +{ + using type = vector; +}; - output.columns[0].x = columns[0].x; - output.columns[1].x = columns[0].y; +template::value > +struct portable_matrix +{ + using type = emulated_matrix; +}; - output.columns[0].y = columns[1].x; - output.columns[1].y = columns[1].y; +template +struct portable_matrix +{ + using type = matrix; +}; - return output; - } +} - ef64_t2x2 operator*(NBL_CONST_REF_ARG(ef64_t2x2) rhs) NBL_CONST_MEMBER_FUNC - { - ef64_t2x2 output; - ef64_t2x2 lhsTransposed = getTransposed(); +template +using portable_vector_t = typename impl::portable_vector::type; - output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); - output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); +template +using portable_vector_t2 = portable_vector_t; +template +using portable_vector_t3 = portable_vector_t; +template +using portable_vector_t4 = portable_vector_t; - output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); - output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); +using portable_vector64_t2 = portable_vector_t2 >; +using portable_vector64_t3 = portable_vector_t3 >; +using portable_vector64_t4 = portable_vector_t4 >; - return output.getTransposed(); - } +template +using portable_matrix_t = typename impl::portable_matrix::type; - ef64_t2 operator*(NBL_CONST_REF_ARG(ef64_t2) rhs) - { - ef64_t2 output; - ef64_t2x2 lhsTransposed = getTransposed(); +template +using portable_matrix_t2x2 = portable_matrix_t; +template +using portable_matrix_t3x3 = portable_matrix_t; - output.x = (columns[0] * rhs).calcComponentSum(); - output.y = (columns[1] * rhs).calcComponentSum(); +using portable_matrix64_t2x2 = portable_matrix_t2x2 >; +using portable_matrix64_t3x3 = portable_matrix_t3x3 >; - return output; - } -}; +template +NBL_CONSTEXPR_INLINE_FUNC portable_float64_t<> create_portable_float64_t(T val) +{ + //return impl::portable_float64_t_creator::create(val); + if (impl::is_emulated >::value) + return portable_float64_t<>::create(val); + else + return portable_float64_t<>(val); +} -#endif +template +NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t2 create_portable_vector64_t2(T val) +{ + portable_vector64_t2 output; + output.x = create_portable_float64_t(val); + output.y = create_portable_float64_t(val); -// struct VecT is solution to -// error: 'nbl::hlsl::emulated_float64_t' cannot be used as a type parameter where a scalar is required -// using float_t2 = typename conditional >::value, ef64_t2, vector >::type; + return output; +} -// TODO: better solution +template +NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t2 create_portable_vector64_t2(X x, Y y) +{ + portable_vector64_t2 output; + output.x = create_portable_float64_t(x); + output.y = create_portable_float64_t(y); -#ifndef __HLSL_VERSION -using F64_t = double; -#else -using F64_t = emulated_float64_t; -#endif + return output; +} -template -struct VecT { using type = void; }; +template +NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t2 create_portable_vector64_t2_from_2d_vec(VecType vec) +{ + portable_vector64_t2 output; + output.x = create_portable_float64_t(vec.x); + output.y = create_portable_float64_t(vec.y); -template<> -struct VecT { using type = vector; }; -template<> -struct VecT { using type = vector; }; -template<> -struct VecT { using type = vector; }; + return output; +} + +template +NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t3(T val) +{ + portable_vector64_t3 output; + output.x = create_portable_float64_t(val); + output.y = create_portable_float64_t(val); + output.z = create_portable_float64_t(val); + + return output; +} + +template +NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t3(X x, Y y, Z z) +{ + portable_vector64_t3 output; + output.x = create_portable_float64_t(x); + output.y = create_portable_float64_t(y); + output.z = create_portable_float64_t(z); + + return output; +} + +template +NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t2_from_3d_vec(VecType vec) +{ + portable_vector64_t3 output; + output.x = create_portable_float64_t(vec.x); + output.y = create_portable_float64_t(vec.y); + output.z = create_portable_float64_t(vec.z); + + return output; +} + +template >::value> +inline float32_t2 convert_portable_vector64_t2_to_float32_t2(portable_vector64_t2 vec) +{ + return float32_t2(vec.x, vec.y); +} -#ifndef __HLSL_VERSION -template<> -struct VecT { using type = float64_t2; }; -template<> -struct VecT { using type = float64_t3; }; template<> -struct VecT { using type = float64_t4; }; +inline float32_t2 convert_portable_vector64_t2_to_float32_t2(portable_vector64_t2 vec) +{ +#ifdef __HLSL_VERSION + return emulated_vector, 2>::create(vec.x, vec.y).getAsFloat2(); +#else + return float32_t2(bit_cast(0xdeadbeefu), bit_cast(0xbadcaffeu)); #endif +} -template<> -struct VecT, 2> { using type = ef64_t2; }; -template<> -struct VecT, 3> { using type = ef64_t3; }; -template<> -struct VecT, 4> { using type = ef64_t4; }; +template >::value> +inline float32_t convert_portable_float64_t_to_float(portable_float64_t<> val) +{ + + return float32_t(val); +} -template -struct Mat2x2T { using type = void; }; template<> -struct Mat2x2T { using type = float32_t2x2; }; -#ifndef __HLSL_VERSION -template<> -struct Mat2x2T { using type = float64_t2x2; }; +inline float32_t convert_portable_float64_t_to_float(portable_float64_t<> val) +{ +#ifdef __HLSL_VERSION + return val.getAsFloat32(); +#else + return float32_t(bit_cast(0xdeadbeefu)); #endif -template<> -struct Mat2x2T > { using type = ef64_t2x2; }; +} -//} } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl index d8dacdc7bb..ba0f70ba67 100644 --- a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl +++ b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl @@ -29,8 +29,7 @@ namespace equations template struct Quadratic { - using float_t22 = typename VecT::type; - using float_t33 = typename VecT::type; + using float_t2 = portable_vector_t2; float_t a; float_t b; @@ -47,9 +46,9 @@ struct Quadratic return t * (a * t + b) + c; } - float_t22 computeRoots() + float_t2 computeRoots() { - float_t22 ret; + float_t2 ret; const float_t det = b * b - 4.0 * a * c; const float_t detSqrt = sqrt(det); diff --git a/include/nbl/builtin/hlsl/math/equations/quartic.hlsl b/include/nbl/builtin/hlsl/math/equations/quartic.hlsl index 882f7a33ba..c34c25602f 100644 --- a/include/nbl/builtin/hlsl/math/equations/quartic.hlsl +++ b/include/nbl/builtin/hlsl/math/equations/quartic.hlsl @@ -24,9 +24,9 @@ namespace equations template struct Quartic { - using float_t2 = typename VecT::type; - using float_t3 = typename VecT::type; - using float_t4 = typename VecT::type; + using float_t2 = portable_vector_t2; + using float_t3 = portable_vector_t3; + using float_t4 = portable_vector_t4; // form: ax^4 + bx^3 + cx^2 + dx + e float_t a; diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index f966fc138c..44d79800ef 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -34,10 +34,10 @@ namespace shapes template struct QuadraticBezier { - using float_t2 = typename VecT::type; - using float_t3 = typename VecT::type; - using float_t4 = typename VecT::type; - using float_t2x2 = typename Mat2x2T::type; + using float_t2 = portable_vector_t2; + using float_t3 = portable_vector_t3; + using float_t4 = portable_vector_t4; + using float_t2x2 = portable_matrix_t2x2; float_t2 P0; float_t2 P1; @@ -211,17 +211,12 @@ struct QuadraticBezier template struct Quadratic { -#ifndef __HLSL_VERSION using scalar_t = float_t; - using float_t2 = vector; - using float_t3 = vector; - using float_t2x2 = matrix; -#else - using scalar_t = float_t; - using float_t2 = typename VecT::type; - using float_t3 = typename VecT::type; - using float_t2x2 = typename Mat2x2T::type; -#endif + using float_t2 = portable_vector_t2; + using float_t3 = portable_vector_t3; + using float_t4 = portable_vector_t4; + using float_t2x2 = portable_matrix_t2x2; + //using float_t3x3 = portable_matrix_t2x2; float_t2 A; float_t2 B; @@ -229,11 +224,6 @@ struct Quadratic struct AnalyticArcLengthCalculator { -#ifndef __HLSL_VERSION - using float_t2 = vector; -#else - using float_t2 = typename VecT::type; -#endif static AnalyticArcLengthCalculator construct(float_t lenA2, float_t AdotB, float_t a, float_t b, float_t c, float_t b_over_4a) { @@ -524,12 +514,7 @@ struct Quadratic template static math::equations::Quartic getBezierBezierIntersectionEquation(NBL_CONST_REF_ARG(QuadraticBezier) lhs, NBL_CONST_REF_ARG(QuadraticBezier) rhs) { -#ifndef __HLSL_VERSION - using scalar_t = double; -#else - using scalar_t = emulated_float64_t; -#endif - using float_t2 = typename VecT::type; + using float_t2 = portable_vector_t2; // Algorithm based on Computer Aided Geometric Design: // https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=1000&context=facpub#page99 @@ -565,37 +550,30 @@ static math::equations::Quartic getBezierBezierIntersectionEquation(NBL Quadratic quadratic = Quadratic::constructFromBezier(lhs); // for convenience - const float_t2 A = quadratic.A; - const float_t2 B = quadratic.B; - const float_t2 C = quadratic.C; + const portable_vector64_t2 A = quadratic.A; + const portable_vector64_t2 B = quadratic.B; + const portable_vector64_t2 C = quadratic.C; // substitute parametric into implicit equation: // Getting the quartic params - scalar_t a = ((A.x * A.x) * k0) + (A.x * A.y * k1) + (A.y * A.y * k2); - scalar_t b = (A.x * B.x * k0 * 2.0f) + (A.x * B.y * k1) + (B.x * A.y * k1) + (A.y * B.y * k2 * 2.0f); - scalar_t c = (A.x * C.x * k0 * 2.0f) + (A.x * C.y * k1) + (A.x * k3) + ((B.x * B.x) * k0) + (B.x * B.y * k1) + (C.x * A.y * k1) + (A.y * C.y * k2 * 2.0f) + (A.y * k4) + ((B.y * B.y) * k2); - scalar_t d = (B.x * C.x * k0 * 2.0f) + (B.x * C.y * k1) + (B.x * k3) + (C.x * B.y * k1) + (B.y * C.y * k2 * 2.0f) + (B.y * k4); - scalar_t e = ((C.x * C.x) * k0) + (C.x * C.y * k1) + (C.x * k3) + ((C.y * C.y) * k2) + (C.y * k4) + (k5); + portable_float64_t<> a = ((A.x * A.x) * k0) + (A.x * A.y * k1) + (A.y * A.y * k2); + portable_float64_t<> b = (A.x * B.x * k0 * 2.0f) + (A.x * B.y * k1) + (B.x * A.y * k1) + (A.y * B.y * k2 * 2.0f); + portable_float64_t<> c = (A.x * C.x * k0 * 2.0f) + (A.x * C.y * k1) + (A.x * k3) + ((B.x * B.x) * k0) + (B.x * B.y * k1) + (C.x * A.y * k1) + (A.y * C.y * k2 * 2.0f) + (A.y * k4) + ((B.y * B.y) * k2); + portable_float64_t<> d = (B.x * C.x * k0 * 2.0f) + (B.x * C.y * k1) + (B.x * k3) + (C.x * B.y * k1) + (B.y * C.y * k2 * 2.0f) + (B.y * k4); + portable_float64_t<> e = ((C.x * C.x) * k0) + (C.x * C.y * k1) + (C.x * k3) + ((C.y * C.y) * k2) + (C.y * k4) + (k5); - return math::equations::Quartic::construct(a, b, c, d, e); + return math::equations::Quartic >::construct(a, b, c, d, e); } // This function returns the analytic quadratic equation to solve for bezier's t value for intersection with another bezier curve template -static math::equations::Quadratic getBezierLineIntersectionEquation(QuadraticBezier bezier, NBL_CONST_REF_ARG(typename VecT::type) lineStart, NBL_CONST_REF_ARG(typename VecT::type) lineVector) +static math::equations::Quadratic getBezierLineIntersectionEquation(QuadraticBezier bezier, NBL_CONST_REF_ARG(portable_vector_t2) lineStart, NBL_CONST_REF_ARG(portable_vector_t2) lineVector) { -#ifndef __HLSL_VERSION - using float_t2 = vector; - using float_t3 = vector; - using float_t4 = vector; - using float_t2x2 = matrix; -#else - using float_t2 = typename VecT::type; - using float_t3 = typename VecT::type; - using float_t4 = typename VecT::type; - using float_t2x2 = typename Mat2x2T::type; -#endif + using float_t2 = portable_vector_t2; + using float_t3 = portable_vector_t3; + using float_t4 = portable_vector_t4; + using float_t2x2 = portable_matrix_t2x2; float_t2 lineDir = normalize(lineVector); float_t2x2 rotate = float_t2x2(float_t2(lineDir.x, lineDir.y), float_t2(-lineDir.y, lineDir.x)); @@ -603,7 +581,7 @@ static math::equations::Quadratic getBezierLineIntersectionEquation(Qua bezier.P1 = mul(rotate, bezier.P1 - lineStart); bezier.P2 = mul(rotate, bezier.P2 - lineStart); Quadratic quadratic = Quadratic::constructFromBezier(bezier); - return math::equations::Quadratic::construct(quadratic.A.y, quadratic.B.y, quadratic.C.y); + return math::equations::Quadratic >::construct(quadratic.A.y, quadratic.B.y, quadratic.C.y); } } // namespace shapes From 424d3a89480eaeb80655fe73f3fb3ee336609555 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 23 Aug 2024 13:49:42 +0100 Subject: [PATCH 034/432] Saving work --- .../hlsl/impl/emulated_float64_t_impl.hlsl | 59 ------------------- include/nbl/builtin/hlsl/tgmath.hlsl | 4 +- 2 files changed, 2 insertions(+), 61 deletions(-) diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index bdaa9202a9..a828449d13 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -173,25 +173,6 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rh #endif } -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 add64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) -{ - uint32_t2 output; - output.y = a1 + b1; - output.x = a0 + b0 + uint32_t(output.y < a1); - - return output; -} - -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 sub64(uint32_t a0, uint32_t a1, uint32_t b0, uint32_t b1) -{ - uint32_t2 output; - output.y = a1 - b1; - output.x = a0 - b0 - uint32_t(a1 < b1); - - return output; -} - - // TODO: test static inline int countLeadingZeros32(uint32_t val) { #ifndef __HLSL_VERSION @@ -220,46 +201,6 @@ NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count return output; } - -NBL_CONSTEXPR_INLINE_FUNC uint32_t4 mul64to128(uint32_t4 mantissasPacked) -{ - uint32_t4 output; - uint32_t more1 = 0u; - uint32_t more2 = 0u; - - // a0 = x - // a1 = y - // b0 = z - // b1 = w - - uint32_t2 z2z3 = umulExtended(mantissasPacked.y, mantissasPacked.w); - output.z = z2z3.x; - output.w = z2z3.y; - uint32_t2 z1more2 = umulExtended(mantissasPacked.y, mantissasPacked.z); - output.y = z1more2.x; - more2 = z1more2.y; - uint32_t2 z1z2 = add64(output.y, more2, 0u, output.z); - output.y = z1z2.x; - output.z = z1z2.y; - uint32_t2 z0more1 = umulExtended(mantissasPacked.x, mantissasPacked.z); - output.x = z0more1.x; - more1 = z0more1.y; - uint32_t2 z0z1 = add64(output.x, more1, 0u, output.y); - output.x = z0z1.x; - output.y = z0z1.y; - uint32_t2 more1more2 = umulExtended(mantissasPacked.x, mantissasPacked.w); - more1 = more1more2.x; - more2 = more1more2.y; - uint32_t2 more1z2 = add64(more1, more2, 0u, output.z); - more1 = more1z2.x; - output.z = more1z2.y; - uint32_t2 z0z12 = add64(output.x, output.y, 0u, more1); - output.x = z0z12.x; - output.y = z0z12.y; - - return output; -} - NBL_CONSTEXPR_INLINE_FUNC uint32_t3 shift64ExtraRightJamming(uint32_t3 val, int count) { uint32_t3 output; diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 3be5230f7f..7190723602 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -27,8 +27,8 @@ inline bool isnan(T val) } // TODO: better implementation, also i'm not sure this is the right place for this function -template -NBL_CONSTEXPR_INLINE_FUNC Uint lerp(Uint a, Uint b, bool c) +template +NBL_CONSTEXPR_INLINE_FUNC enable_if::type, T>::type lerp(T a, T b, bool c) { return c ? b : a; } From 2e9a25560807274e49cfcbc59403190d93edc636 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 23 Aug 2024 20:01:44 +0100 Subject: [PATCH 035/432] Saving work --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 19 ++- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 137 ++++++++++-------- .../hlsl/emulated_float64_t_utils.hlsl | 16 ++ 4 files changed, 109 insertions(+), 65 deletions(-) diff --git a/examples_tests b/examples_tests index 0bfc208a6e..293ec73d47 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0bfc208a6e658f5b41d85d5afc74d4073a0d2f91 +Subproject commit 293ec73d47f2ecdedd2e4b00b31b663e03aa000a diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 688834f730..dfa12aa33d 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -49,10 +49,23 @@ namespace nbl { namespace hlsl { - template - T _static_cast(U v) + namespace impl { - return (T)v; + template + struct static_cast_helper + { + static inline To cast(From u) + { + return To(u); + } + }; + } + + template + To _static_cast(From v) + { + return impl::static_cast_helper(v); + //return (T)v; } #if 0 // TODO: for later diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index d77a22ae2b..cba77b7ddd 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -22,27 +22,29 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(emulated_float64_t val) { - return createPreserveBitPattern(val.data); + //return bit_cast >(val.data); + return val; + //TODO: return val? } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int32_t val) { - return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(int64_t(val))); + return bit_cast >(impl::castToUint64WithFloat64BitPattern(int64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int64_t val) { - return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast >(impl::castToUint64WithFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint32_t val) { - return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); + return bit_cast >(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint64_t val) { - return emulated_float64_t::createPreserveBitPattern(impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast >(impl::castToUint64WithFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float32_t val) @@ -54,7 +56,7 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float64_t val) { - return createPreserveBitPattern(bit_cast(float64_t(val))); + return bit_cast >(bit_cast(float64_t(val))); #ifdef __HLSL_VERSION emulated_float64_t retval; uint32_t lo, hi; @@ -62,7 +64,7 @@ namespace hlsl retval.data = (uint64_t(hi) << 32) | lo; return retval; #else - return createPreserveBitPattern(reinterpret_cast(val)); + return bit_cast >(reinterpret_cast(val)); #endif } @@ -72,11 +74,6 @@ namespace hlsl return emulated_float64_t(bit_cast(float64_t(val))); }*/ - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t createPreserveBitPattern(uint64_t val) - { - return emulated_float64_t(val); - } - inline float getAsFloat32() { int exponent = ieee754::extractExponent(data); @@ -123,7 +120,7 @@ namespace hlsl int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) - return createPreserveBitPattern(ieee754::traits::quietNaN); + return bit_cast >(ieee754::traits::quietNaN); /*if (std::isinf(lhs) || std::isinf(rhs)) { if (std::isinf(lhs) && !std::isinf(rhs)) @@ -174,12 +171,12 @@ namespace hlsl if (!FastMath && (rp + l2 + 1 < nbl::hlsl::numeric_limits::min_exponent)) { - return createPreserveBitPattern(impl::assembleFloat64(0, ieee754::traits::exponentMask, 0)); + return bit_cast >(impl::assembleFloat64(0, ieee754::traits::exponentMask, 0)); } else { rp = addTmp ? l2 + rp + ieee754::traits::exponentBias : 0; - return createPreserveBitPattern(impl::assembleFloat64( + return bit_cast >(impl::assembleFloat64( sign, (uint64_t(rp) << ieee754::traits::mantissaBitCnt) & ieee754::traits::exponentMask, shiftLeftAllowNegBitCnt(addTmp, (ieee754::traits::mantissaBitCnt - l2)) & ieee754::traits::mantissaMask) @@ -191,7 +188,7 @@ namespace hlsl if (FlushDenormToZero) { - emulated_float64_t retval = createPreserveBitPattern(0u); + emulated_float64_t retval = emulated_float64_t::create(0ull); uint64_t mantissa; uint32_t3 mantissaExtended; @@ -213,12 +210,12 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return createPreserveBitPattern(tgmath::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(tgmath::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); } mantissa = lhsMantissa + rhsMantissa; if (lhsBiasedExp == 0) - return createPreserveBitPattern(impl::assembleFloat64(lhsSign, 0, mantissa)); + return bit_cast >(impl::assembleFloat64(lhsSign, 0, mantissa)); mantissaExtended.xy = impl::packUint64(mantissa); mantissaExtended.x |= 0x00200000u; mantissaExtended.z = 0u; @@ -237,7 +234,7 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { const bool propagate = (lhsMantissa) != 0u; - return createPreserveBitPattern(tgmath::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(tgmath::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); } expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); @@ -256,11 +253,11 @@ namespace hlsl ++biasedExp; } - return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); + return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); } // cannot happen but compiler cries about not every path returning value - return createPreserveBitPattern(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); + return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); } else { @@ -281,7 +278,7 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = lhsMantissa != 0u; - return createPreserveBitPattern(tgmath::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(tgmath::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); } expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); @@ -291,12 +288,12 @@ namespace hlsl frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); biasedExp = lhsBiasedExp; --biasedExp; - return createPreserveBitPattern(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); + return bit_cast >(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); } if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return createPreserveBitPattern(tgmath::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(tgmath::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); } rhsBiasedExp = tgmath::lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); lhsBiasedExp = tgmath::lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); @@ -331,24 +328,24 @@ namespace hlsl lhsSign ^= signOfDifference; uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return createPreserveBitPattern(tgmath::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + return bit_cast >(tgmath::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); } } else { //static_assert(false, "not implemented yet"); - return createPreserveBitPattern(0xdeadbeefbadcaffeull); + return bit_cast >(0xdeadbeefbadcaffeull); } } emulated_float64_t operator+(float rhs) { - return createPreserveBitPattern(data) + create(rhs); + return bit_cast >(data) + create(rhs); } emulated_float64_t operator-(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - emulated_float64_t lhs = createPreserveBitPattern(data); + emulated_float64_t lhs = bit_cast >(data); emulated_float64_t rhsFlipped = rhs.flipSign(); return lhs + rhsFlipped; @@ -356,14 +353,14 @@ namespace hlsl emulated_float64_t operator-(float rhs) NBL_CONST_MEMBER_FUNC { - return createPreserveBitPattern(data) - create(rhs); + return bit_cast >(data) - create(rhs); } emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if(FlushDenormToZero) { - emulated_float64_t retval = emulated_float64_t::createPreserveBitPattern(0u); + emulated_float64_t retval = emulated_float64_t::create(0ull); uint64_t lhsSign = data & ieee754::traits::signMask; uint64_t rhsSign = rhs.data & ieee754::traits::signMask; @@ -379,11 +376,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) - return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); + return bit_cast >(impl::propagateFloat64NaN(data, rhs.data)); if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) - return createPreserveBitPattern(ieee754::traits::quietNaN); + return bit_cast >(ieee754::traits::quietNaN); - return createPreserveBitPattern(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); + return bit_cast >(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); } if (rhsBiasedExp == ieee754::traits::specialValueExp) { @@ -392,23 +389,23 @@ namespace hlsl #ifdef RELAXED_NAN_PROPAGATION return rhs.data; #else - return createPreserveBitPattern(impl::propagateFloat64NaN(data, rhs.data)); + return bit_cast >(impl::propagateFloat64NaN(data, rhs.data)); #endif if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) - return createPreserveBitPattern(ieee754::traits::quietNaN); + return bit_cast >(ieee754::traits::quietNaN); - return createPreserveBitPattern(sign | ieee754::traits::exponentMask); + return bit_cast >(sign | ieee754::traits::exponentMask); } if (lhsBiasedExp == 0) { if (lhsMantissa == 0u) - return createPreserveBitPattern(sign); + return bit_cast >(sign); impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); } if (rhsBiasedExp == 0) { if (rhsMantissa == 0u) - return createPreserveBitPattern(sign); + return bit_cast >(sign); impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); } } @@ -428,24 +425,24 @@ namespace hlsl } newPseudoMantissa &= (ieee754::traits::mantissaMask); - return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); + return bit_cast >(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); } else { //static_assert(false, "not implemented yet"); - return createPreserveBitPattern(0xdeadbeefbadcaffeull); + return bit_cast >(0xdeadbeefbadcaffeull); } } emulated_float64_t operator*(float rhs) { - return createPreserveBitPattern(data) * create(rhs); + return bit_cast >(data) * create(rhs); } /*emulated_float64_t reciprocal(uint64_t x) { using ThisType = emulated_float64_t; - ThisType output = ThisType::createPreserveBitPattern((0xbfcdd6a18f6a6f52ULL - x) >> 1); + ThisType output = ThisType::bit_cast >((0xbfcdd6a18f6a6f52ULL - x) >> 1); output = output * output; return output; }*/ @@ -454,23 +451,23 @@ namespace hlsl { if (FlushDenormToZero) { - //return emulated_float64_t::createPreserveBitPattern(data) * reciprocal(rhs.data); + //return emulated_float64_t::bit_cast >(data) * reciprocal(rhs.data); if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return createPreserveBitPattern(ieee754::traits::quietNaN); + return bit_cast >(ieee754::traits::quietNaN); if (!FastMath && ((rhs.data << 1) == 0)) - return createPreserveBitPattern(ieee754::traits::quietNaN); + return bit_cast >(ieee754::traits::quietNaN); const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return createPreserveBitPattern(ieee754::traits::quietNaN | sign); + return bit_cast >(ieee754::traits::quietNaN | sign); if (!FastMath && tgmath::isInf(data)) - return createPreserveBitPattern((data & ~ieee754::traits::signMask) | sign); + return bit_cast >((data & ~ieee754::traits::signMask) | sign); if (!FastMath && tgmath::isInf(rhs.data)) - return createPreserveBitPattern(0ull | sign); + return bit_cast >(0ull | sign); @@ -490,12 +487,12 @@ namespace hlsl mantissa &= ieee754::traits::mantissaMask; - return createPreserveBitPattern(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); + return bit_cast >(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); } else { //static_assert(false, "not implemented yet"); - return createPreserveBitPattern(0xdeadbeefbadcaffeull); + return bit_cast >(0xdeadbeefbadcaffeull); } } @@ -509,7 +506,7 @@ namespace hlsl if (!FastMath && impl::areBothZero(data, rhs.data)) return true; - const emulated_float64_t xored = createPreserveBitPattern(data ^ rhs.data); + const emulated_float64_t xored = bit_cast >(data ^ rhs.data); // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) return true; @@ -518,7 +515,7 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - return !(createPreserveBitPattern(data) == rhs); + return !(bit_cast >(data) == rhs); } bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { @@ -560,18 +557,17 @@ namespace hlsl return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); } - bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return !(emulated_float64_t::createPreserveBitPattern(data) > emulated_float64_t::createPreserveBitPattern(rhs.data)); } - bool operator>=(emulated_float64_t rhs) { return !(emulated_float64_t::createPreserveBitPattern(data) < emulated_float64_t::createPreserveBitPattern(rhs.data)); } + bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return !(bit_cast >(data) > bit_cast >(rhs.data)); } + bool operator>=(emulated_float64_t rhs) { return !(bit_cast >(data) < bit_cast >(rhs.data)); } //logical operators bool operator&&(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) && bool(rhs.data); } bool operator||(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) || bool(rhs.data); } bool operator!() NBL_CONST_MEMBER_FUNC { return !bool(data); } - - // TODO: should modify self? + emulated_float64_t flipSign() { - return createPreserveBitPattern(data ^ ieee754::traits::signMask); + return bit_cast >(data ^ ieee754::traits::signMask); } bool isNaN() @@ -618,12 +614,31 @@ NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMa return extractMantissa(x.data);\ }\ + +// TODO: this is wrong! fix it +#define DEFINE_BIT_CAST_SPEC(Type)\ +template<>\ +NBL_CONSTEXPR_FUNC Type bit_cast(NBL_CONST_REF_ARG(uint64_t) val)\ +{\ +Type output; \ +output.data = val; \ +\ +return output; \ +}\ +\ + + +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); + namespace ieee754 { - IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); - IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); - IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); - IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); } } diff --git a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl index 8b8ee0966c..19f94b3c11 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl @@ -1,6 +1,7 @@ #ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ #define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ +#include #include #include @@ -20,6 +21,21 @@ template >::type; #endif +namespace impl +{ + +template +struct static_cast_helper > +{ + static inline portable_float64_t<> cast(From u) + { + return int(u) - 1; + } +}; + +} + + template struct emulated_vector {}; From fa3636a57baa97dc1b59fa63d5fbf55ef70269ea Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Sat, 24 Aug 2024 18:33:30 +0200 Subject: [PATCH 036/432] add missing if statement for device gen builtins (top level cmake builtins include got removed causing it to not see some functions) --- src/nbl/device/CMakeLists.txt | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/nbl/device/CMakeLists.txt b/src/nbl/device/CMakeLists.txt index e850954dfe..62daf7398f 100644 --- a/src/nbl/device/CMakeLists.txt +++ b/src/nbl/device/CMakeLists.txt @@ -61,17 +61,19 @@ add_custom_command(OUTPUT ${NBL_OUTPUT_HEADERS} add_custom_target(DeviceHeaders DEPENDS ${NBL_OUTPUT_HEADERS}) -LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_members.hlsl") -LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_testers.hlsl") -LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_defaults.hlsl") -LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_floats.hlsl") -LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_enums.hlsl") +if(NBL_EMBED_BUILTIN_RESOURCES) + LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_members.hlsl") + LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_testers.hlsl") + LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_defaults.hlsl") + LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_floats.hlsl") + LIST_BUILTIN_RESOURCE(NBL_DEVICE_GEN_RESOURCES_TO_EMBED "video/device_capabilities_traits_enums.hlsl") -get_filename_component(_DEVICE_GEN_BR_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/builtin/include" ABSOLUTE) -get_filename_component(_DEVICE_GEN_BR_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/builtin/src" ABSOLUTE) + get_filename_component(_DEVICE_GEN_BR_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/builtin/include" ABSOLUTE) + get_filename_component(_DEVICE_GEN_BR_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/builtin/src" ABSOLUTE) -ADD_CUSTOM_BUILTIN_RESOURCES(deviceGenBuiltinResourceData NBL_DEVICE_GEN_RESOURCES_TO_EMBED "${NBL_DEVICE_GEN_INCLUDE_DIR}" "nbl" "nbl::devicegen::builtin" "${_DEVICE_GEN_BR_OUTPUT_DIRECTORY_HEADER_}" "${_DEVICE_GEN_BR_OUTPUT_DIRECTORY_SOURCE_}" "STATIC" "INTERNAL") -add_dependencies(deviceGenBuiltinResourceData DeviceHeaders) + ADD_CUSTOM_BUILTIN_RESOURCES(deviceGenBuiltinResourceData NBL_DEVICE_GEN_RESOURCES_TO_EMBED "${NBL_DEVICE_GEN_INCLUDE_DIR}" "nbl" "nbl::devicegen::builtin" "${_DEVICE_GEN_BR_OUTPUT_DIRECTORY_HEADER_}" "${_DEVICE_GEN_BR_OUTPUT_DIRECTORY_SOURCE_}" "STATIC" "INTERNAL") + add_dependencies(deviceGenBuiltinResourceData DeviceHeaders) +endif() set(NBL_DEVICE_GEN_INCLUDE_DIR "${NBL_DEVICE_GEN_INCLUDE_DIR}" From a265dbadd2c4d51bf08e0120ebb5efb01c60ca04 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 24 Aug 2024 17:38:38 +0100 Subject: [PATCH 037/432] Saving work --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 76 ++++++------ .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 82 +++++++++++++ .../hlsl/emulated_float64_t_utils.hlsl | 110 +++++++++++------- 4 files changed, 193 insertions(+), 77 deletions(-) diff --git a/examples_tests b/examples_tests index 293ec73d47..b6b11ae462 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 293ec73d47f2ecdedd2e4b00b31b663e03aa000a +Subproject commit b6b11ae462a551ead3380f1b87c29a342e125365 diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index dfa12aa33d..9bf57fb29e 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -3,6 +3,28 @@ #include +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + template + struct static_cast_helper + { + static inline To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } + }; +} +} +} + #ifndef __HLSL_VERSION #include @@ -16,10 +38,10 @@ namespace nbl::hlsl { - template - T _static_cast(U v) + template + To _static_cast(From v) { - return static_cast(v); + return impl::static_cast_helper::cast(v); } template @@ -47,41 +69,29 @@ namespace nbl::hlsl namespace nbl { - namespace hlsl - { - namespace impl - { - template - struct static_cast_helper - { - static inline To cast(From u) - { - return To(u); - } - }; - } +namespace hlsl +{ - template - To _static_cast(From v) - { - return impl::static_cast_helper(v); - //return (T)v; - } +template +To _static_cast(From v) +{ + return impl::static_cast_helper::cast(v); +} #if 0 // TODO: for later - template - struct add_reference - { - using type = ref; - }; - template - struct add_pointer - { - using type = ptr; - }; +template +struct add_reference +{ + using type = ref; +}; +template +struct add_pointer +{ + using type = ptr; +}; #endif - } +} } #define NBL_REF_ARG(...) inout __VA_ARGS__ diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index cba77b7ddd..9fbf8792da 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -574,6 +574,11 @@ namespace hlsl { return tgmath::isnan(data); } + + NBL_CONSTEXPR_STATIC_INLINE bool supportsFastMath() + { + return FastMath; + } }; #define COMMA , @@ -627,6 +632,81 @@ return output; \ }\ \ +namespace impl +{ + +#if 0 +template +struct static_cast_helper,void> +{ + using From = emulated_float64_t; + + static inline Scalar cast(From v) + { + if (is_floating_point::value) // DOUBLE ALSO REPORTS THIS AS TRUE! (so does float16_t) + { + int exponent = ieee754::extractExponent(v.data); + if (!From::supportsFastMath()) + { + if (exponent > 127) + return bit_cast(ieee754::traits::inf); + if (exponent < -126) + return -bit_cast(ieee754::traits::inf); + if (tgmath::isnan(v.data)) + return bit_cast(ieee754::traits::quietNaN); + } + + uint32_t sign = uint32_t((v.data & ieee754::traits::signMask) >> 32); + uint32_t biasedExponent = uint32_t(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; + uint32_t mantissa = uint32_t(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; + + return bit_cast(sign | biasedExponent | mantissa); + } + + return bit_cast(ieee754::traits::quietNaN); + } +}; +#endif + +// TODO: fix cast to float +#define DEFINE_EMULATED_FLOAT64_STATIC_CAST(Type)\ +template\ +struct static_cast_helper\ +{\ + static inline To cast(Type v)\ + {\ + if (is_floating_point::value)\ + {\ + int exponent = ieee754::extractExponent(v.data);\ + if (!Type::supportsFastMath())\ + {\ + if (exponent > 127)\ + return bit_cast(ieee754::traits::inf);\ + if (exponent < -126)\ + return -bit_cast(ieee754::traits::inf);\ + if (tgmath::isnan(v.data))\ + return bit_cast(ieee754::traits::quietNaN);\ + }\ +\ + uint32_t sign = uint32_t((v.data & ieee754::traits::signMask) >> 32);\ + uint32_t biasedExponent = uint32_t(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt;\ + uint32_t mantissa = uint32_t(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask;\ +\ + return bit_cast(sign | biasedExponent | mantissa);\ + }\ +\ + return bit_cast(ieee754::traits::quietNaN);\ + }\ +};\ +\ + +DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); +DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); +DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); +DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); + +} + DEFINE_BIT_CAST_SPEC(emulated_float64_t); DEFINE_BIT_CAST_SPEC(emulated_float64_t); @@ -652,5 +732,7 @@ IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t >::type; #endif -namespace impl -{ - -template -struct static_cast_helper > -{ - static inline portable_float64_t<> cast(From u) - { - return int(u) - 1; - } -}; - -} - - template struct emulated_vector {}; @@ -133,11 +118,6 @@ struct emulated_vector return output; } - - float32_t2 getAsFloat2() - { - return float32_t2(x.getAsFloat32(), y.getAsFloat32()); - } }; template @@ -183,6 +163,64 @@ using emulated_vector_t3 = emulated_vector; template using emulated_vector_t4 = emulated_vector; +// TODO: works only for float, fix +namespace impl +{ + +#if 1 +template +struct static_cast_helper,emulated_vector,void> +{ + static inline vector cast(emulated_vector vec) + { + return vector(_static_cast(vec.x), _static_cast(vec.y)); + } +}; +#endif + +#if 0 +#define DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,...)\ +template\ +struct static_cast_helper >\ +{\ + static inline To cast(emulated_vector_t2<__VA_ARGS__ > vec)\ + {\ + return To(_static_cast(vec.x), _static_cast(vec.y));\ + }\ +};\ +\ +template\ +struct static_cast_helper >\ +{\ + static inline To cast(emulated_vector_t3<__VA_ARGS__ > vec)\ + {\ + return To(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z));\ + }\ +};\ +\ +template\ +struct static_cast_helper >\ +{\ + static inline To cast(emulated_vector_t4<__VA_ARGS__ > vec)\ + {\ + return To(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z), _static_cast(vec.w));\ + }\ +};\ +\ + +#define COND +DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); +DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); +DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); +DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); + +#undef DEFINE_EMULATED_VECTOR_STATIC_CAST +#undef COND +#endif + + +} + //template //struct emulated_matrix_base //{ @@ -311,10 +349,10 @@ namespace impl template struct is_emulated { - NBL_CONSTEXPR_STATIC_INLINE bool value = is_same >::value || - is_same >::value || - is_same >::value || - is_same >::value; + NBL_CONSTEXPR_STATIC_INLINE bool value = is_same_v > || + is_same_v > || + is_same_v > || + is_same_v >; }; template::value > @@ -447,15 +485,11 @@ inline float32_t2 convert_portable_vector64_t2_to_float32_t2(portable_vector64_t return float32_t2(vec.x, vec.y); } -template<> -inline float32_t2 convert_portable_vector64_t2_to_float32_t2(portable_vector64_t2 vec) -{ -#ifdef __HLSL_VERSION - return emulated_vector, 2>::create(vec.x, vec.y).getAsFloat2(); -#else - return float32_t2(bit_cast(0xdeadbeefu), bit_cast(0xbadcaffeu)); -#endif -} +//template<> +//inline float32_t2 convert_portable_vector64_t2_to_float32_t2(portable_vector64_t2 vec) +//{ +// return _static_cast(vec); +//} template >::value> inline float32_t convert_portable_float64_t_to_float(portable_float64_t<> val) @@ -464,16 +498,6 @@ inline float32_t convert_portable_float64_t_to_float(portable_float64_t<> val) return float32_t(val); } -template<> -inline float32_t convert_portable_float64_t_to_float(portable_float64_t<> val) -{ -#ifdef __HLSL_VERSION - return val.getAsFloat32(); -#else - return float32_t(bit_cast(0xdeadbeefu)); -#endif -} - } } #endif \ No newline at end of file From 96d3281eed469f70cdfee5af7357430fc289a731 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 26 Aug 2024 19:09:12 +0100 Subject: [PATCH 038/432] Implemented casting functions for emulated_float64_t --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 19 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 180 +++++++++--------- .../hlsl/emulated_float64_t_utils.hlsl | 55 ------ .../nbl/builtin/hlsl/glsl_compat/core.hlsl | 6 + include/nbl/builtin/hlsl/ieee754.hlsl | 2 + .../hlsl/impl/emulated_float64_t_impl.hlsl | 55 +++--- include/nbl/builtin/hlsl/tgmath.hlsl | 15 +- 8 files changed, 136 insertions(+), 198 deletions(-) diff --git a/examples_tests b/examples_tests index b6b11ae462..dc1010363e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b6b11ae462a551ead3380f1b87c29a342e125365 +Subproject commit dc1010363efd212a943c03f40e1a2dc404b36bfc diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 9bf57fb29e..87c96d6fe3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -22,6 +22,13 @@ namespace impl } }; } + +template +To _static_cast(From v) +{ + return impl::static_cast_helper::cast(v); +} + } } @@ -38,12 +45,6 @@ namespace impl namespace nbl::hlsl { - template - To _static_cast(From v) - { - return impl::static_cast_helper::cast(v); - } - template using add_reference = std::add_lvalue_reference; @@ -72,12 +73,6 @@ namespace nbl namespace hlsl { -template -To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - #if 0 // TODO: for later template struct add_reference diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 9fbf8792da..fcda118b33 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -22,9 +22,7 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(emulated_float64_t val) { - //return bit_cast >(val.data); return val; - //TODO: return val? } NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int32_t val) @@ -210,7 +208,7 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return bit_cast >(tgmath::lerp(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(glsl::mix(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); } mantissa = lhsMantissa + rhsMantissa; @@ -234,11 +232,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { const bool propagate = (lhsMantissa) != 0u; - return bit_cast >(tgmath::lerp(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(glsl::mix(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); } - expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = tgmath::lerp(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); + expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = glsl::mix(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); rhsMantissa = impl::unpackUint64(shifted.xy); mantissaExtended.z = shifted.z; @@ -278,11 +276,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = lhsMantissa != 0u; - return bit_cast >(tgmath::lerp(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(glsl::mix(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); } - expDiff = tgmath::lerp(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = tgmath::lerp(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); + expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = glsl::mix(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); lhsMantissa |= 0x4000000000000000ull; frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); @@ -293,10 +291,10 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return bit_cast >(tgmath::lerp(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + return bit_cast >(glsl::mix(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); } - rhsBiasedExp = tgmath::lerp(rhsBiasedExp, 1, lhsBiasedExp == 0); - lhsBiasedExp = tgmath::lerp(lhsBiasedExp, 1, lhsBiasedExp == 0); + rhsBiasedExp = glsl::mix(rhsBiasedExp, 1, lhsBiasedExp == 0); + lhsBiasedExp = glsl::mix(lhsBiasedExp, 1, lhsBiasedExp == 0); const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); @@ -324,11 +322,11 @@ namespace hlsl signOfDifference = ieee754::traits::signMask; } - biasedExp = tgmath::lerp(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); + biasedExp = glsl::mix(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); lhsSign ^= signOfDifference; uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return bit_cast >(tgmath::lerp(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + return bit_cast >(glsl::mix(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); } } else @@ -581,51 +579,48 @@ namespace hlsl } }; -#define COMMA , -#define IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(Type) \ +#define IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(...) \ template<>\ -struct traits_base\ +struct traits_base<__VA_ARGS__ >\ {\ NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 11;\ NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52;\ };\ template<>\ -inline uint32_t extractBiasedExponent(Type x)\ +inline uint32_t extractBiasedExponent(__VA_ARGS__ x)\ {\ return extractBiasedExponent(x.data);\ }\ \ template<>\ -inline int extractExponent(Type x)\ +inline int extractExponent(__VA_ARGS__ x)\ {\ return extractExponent(x.data);\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC Type replaceBiasedExponent(Type x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ - return Type(replaceBiasedExponent(x.data, biasedExp));\ + return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC Type fastMulExp2(Type x, int n)\ +NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ - return Type(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ + return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(Type x)\ +NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ - -// TODO: this is wrong! fix it -#define DEFINE_BIT_CAST_SPEC(Type)\ +#define DEFINE_BIT_CAST_SPEC(...)\ template<>\ -NBL_CONSTEXPR_FUNC Type bit_cast(NBL_CONST_REF_ARG(uint64_t) val)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ bit_cast<__VA_ARGS__, uint64_t>(NBL_CONST_REF_ARG(uint64_t) val)\ {\ -Type output; \ +__VA_ARGS__ output; \ output.data = val; \ \ return output; \ @@ -635,90 +630,92 @@ return output; \ namespace impl { -#if 0 -template -struct static_cast_helper,void> +template +struct static_cast_helper,void> { + // TODO: + // static_assert(is_arithmetic::value); + using From = emulated_float64_t; - static inline Scalar cast(From v) + // TODO: test + static inline To cast(From v) { - if (is_floating_point::value) // DOUBLE ALSO REPORTS THIS AS TRUE! (so does float16_t) + if (is_same_v) + return To(bit_cast(v.data)); + + if (is_floating_point::value) { - int exponent = ieee754::extractExponent(v.data); + const int exponent = ieee754::extractExponent(v.data); if (!From::supportsFastMath()) { - if (exponent > 127) - return bit_cast(ieee754::traits::inf); - if (exponent < -126) - return -bit_cast(ieee754::traits::inf); + if (exponent > ieee754::traits::exponentMax) + return bit_cast(ieee754::traits::inf); + if (exponent < ieee754::traits::exponentMin) + return -bit_cast(ieee754::traits::inf); if (tgmath::isnan(v.data)) - return bit_cast(ieee754::traits::quietNaN); + return bit_cast(ieee754::traits::quietNaN); } - uint32_t sign = uint32_t((v.data & ieee754::traits::signMask) >> 32); - uint32_t biasedExponent = uint32_t(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; - uint32_t mantissa = uint32_t(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; + using AsUint = typename unsigned_integer_of_size::type; - return bit_cast(sign | biasedExponent | mantissa); + const uint32_t toBitSize = sizeof(To) * 8; + const AsUint sign = AsUint(ieee754::extractSign(v.data) << (toBitSize - 1)); + const AsUint biasedExponent = AsUint(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; + const AsUint mantissa = AsUint(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; + + return bit_cast(sign | biasedExponent | mantissa); } - return bit_cast(ieee754::traits::quietNaN); - } -}; -#endif + // NOTE: casting from negative float to unsigned int is an UB, function will return abs value in this case + if (is_integral::value) + { + const int exponent = ieee754::extractExponent(v.data); + if (exponent < 0) + return 0; -// TODO: fix cast to float -#define DEFINE_EMULATED_FLOAT64_STATIC_CAST(Type)\ -template\ -struct static_cast_helper\ -{\ - static inline To cast(Type v)\ - {\ - if (is_floating_point::value)\ - {\ - int exponent = ieee754::extractExponent(v.data);\ - if (!Type::supportsFastMath())\ - {\ - if (exponent > 127)\ - return bit_cast(ieee754::traits::inf);\ - if (exponent < -126)\ - return -bit_cast(ieee754::traits::inf);\ - if (tgmath::isnan(v.data))\ - return bit_cast(ieee754::traits::quietNaN);\ - }\ -\ - uint32_t sign = uint32_t((v.data & ieee754::traits::signMask) >> 32);\ - uint32_t biasedExponent = uint32_t(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt;\ - uint32_t mantissa = uint32_t(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask;\ -\ - return bit_cast(sign | biasedExponent | mantissa);\ - }\ -\ - return bit_cast(ieee754::traits::quietNaN);\ - }\ -};\ -\ + uint64_t unsignedOutput = ieee754::extractMantissa(v.data) & 1ull << ieee754::traits::mantissaBitCnt; + const int shiftAmount = exponent - int(ieee754::traits::mantissaBitCnt); + + if (shiftAmount < 0) + unsignedOutput <<= -shiftAmount; + else + unsignedOutput >>= shiftAmount; -DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); -DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); -DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); -DEFINE_EMULATED_FLOAT64_STATIC_CAST(emulated_float64_t); + if (is_signed::value) + { + int64_t signedOutput64 = unsignedOutput & ((1ull << 63) - 1); + To signedOutput = To(signedOutput64); + if (ieee754::extractSignPreserveBitPattern(v.data) != 0) + signedOutput = -signedOutput; + + return signedOutput; + } + + return To(unsignedOutput); + } + + // assert(false); + return 0xdeadbeefbadcaffeull; + } +}; } +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); -DEFINE_BIT_CAST_SPEC(emulated_float64_t); -DEFINE_BIT_CAST_SPEC(emulated_float64_t); -DEFINE_BIT_CAST_SPEC(emulated_float64_t); -DEFINE_BIT_CAST_SPEC(emulated_float64_t); +template +struct is_floating_point > : bool_constant {}; namespace ieee754 { -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); } } @@ -730,7 +727,6 @@ IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t; namespace impl { -#if 1 template struct static_cast_helper,emulated_vector,void> { @@ -176,48 +175,6 @@ struct static_cast_helper,emulated_vector,void> return vector(_static_cast(vec.x), _static_cast(vec.y)); } }; -#endif - -#if 0 -#define DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,...)\ -template\ -struct static_cast_helper >\ -{\ - static inline To cast(emulated_vector_t2<__VA_ARGS__ > vec)\ - {\ - return To(_static_cast(vec.x), _static_cast(vec.y));\ - }\ -};\ -\ -template\ -struct static_cast_helper >\ -{\ - static inline To cast(emulated_vector_t3<__VA_ARGS__ > vec)\ - {\ - return To(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z));\ - }\ -};\ -\ -template\ -struct static_cast_helper >\ -{\ - static inline To cast(emulated_vector_t4<__VA_ARGS__ > vec)\ - {\ - return To(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z), _static_cast(vec.w));\ - }\ -};\ -\ - -#define COND -DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); -DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); -DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); -DEFINE_EMULATED_VECTOR_STATIC_CAST(COND,emulated_float64_t); - -#undef DEFINE_EMULATED_VECTOR_STATIC_CAST -#undef COND -#endif - } @@ -479,18 +436,6 @@ NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t2_from_ return output; } -template >::value> -inline float32_t2 convert_portable_vector64_t2_to_float32_t2(portable_vector64_t2 vec) -{ - return float32_t2(vec.x, vec.y); -} - -//template<> -//inline float32_t2 convert_portable_vector64_t2_to_float32_t2(portable_vector64_t2 vec) -//{ -// return _static_cast(vec); -//} - template >::value> inline float32_t convert_portable_float64_t_to_float(portable_float64_t<> val) { diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index 61bc20e655..bcc0c76bdb 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -15,6 +15,12 @@ namespace hlsl namespace glsl { +template +NBL_CONSTEXPR_INLINE_FUNC typename enable_if::value, T>::type mix(T a, T b, bool c) +{ + return c ? b : a; +} + #ifndef __HLSL_VERSION // GLM Aliases diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 13308bb2dd..e08831e2ce 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -93,6 +93,8 @@ struct traits : traits_base NBL_CONSTEXPR_STATIC_INLINE bit_rep_t quietNaN = exponentMask | (1ull << (base_t::mantissaBitCnt - 1)); NBL_CONSTEXPR_STATIC_INLINE bit_rep_t max = ((1ull << (sizeof(Float) * 8 - 1)) - 1) & (~(1ull << base_t::mantissaBitCnt)); NBL_CONSTEXPR_STATIC_INLINE bit_rep_t min = 1ull << base_t::mantissaBitCnt; + NBL_CONSTEXPR_STATIC_INLINE int exponentMax = exponentBias; + NBL_CONSTEXPR_STATIC_INLINE int exponentMin = -(exponentBias - 1); }; template diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index a828449d13..92df407530 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -5,6 +5,7 @@ #include #include #include +#include #define FLOAT_ROUND_NEAREST_EVEN 0 #define FLOAT_ROUND_TO_ZERO 1 @@ -168,7 +169,7 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rh lhs |= 0x0008000000000000ull; rhs |= 0x0008000000000000ull; - return tgmath::lerp(rhs, tgmath::lerp(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); + return glsl::mix(rhs, glsl::mix(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); return 0; #endif } @@ -187,16 +188,16 @@ NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count uint32_t2 output; const int negCount = (-count) & 31; - output.x = tgmath::lerp(0u, val.x, count == 0); - output.x = tgmath::lerp(output.x, (val.x >> count), count < 32); + output.x = glsl::mix(0u, val.x, count == 0); + output.x = glsl::mix(output.x, (val.x >> count), count < 32); output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); - output.y = tgmath::lerp(output.y, (val.x << negCount) | (val.y >> count), count < 32); + output.y = glsl::mix(0u, (val.x >> (count & 31)), count < 64); + output.y = glsl::mix(output.y, (val.x << negCount) | (val.y >> count), count < 32); - val.z = tgmath::lerp(val.z | val.y, val.z, count < 32); - output.x = tgmath::lerp(output.x, val.x >> count, count < 32); + val.z = glsl::mix(val.z | val.y, val.z, count < 32); + output.x = glsl::mix(output.x, val.x >> count, count < 32); output.z |= uint32_t(val.z != 0u); - output.x = tgmath::lerp(output.x, 0u, (count == 32)); - output.y = tgmath::lerp(output.y, val.x, (count == 32)); - output.z = tgmath::lerp(output.z, val.y, (count == 32)); - output.x = tgmath::lerp(output.x, val.x, (count == 0)); - output.y = tgmath::lerp(output.y, val.y, (count == 0)); - output.z = tgmath::lerp(output.z, val.z, (count == 0)); + output.x = glsl::mix(output.x, 0u, (count == 32)); + output.y = glsl::mix(output.y, val.x, (count == 32)); + output.z = glsl::mix(output.z, val.y, (count == 32)); + output.x = glsl::mix(output.x, val.x, (count == 0)); + output.y = glsl::mix(output.y, val.y, (count == 0)); + output.z = glsl::mix(output.z, val.z, (count == 0)); return output; } @@ -236,7 +237,7 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t shortShift64Left(uint64_t val, int count) uint32_t2 output; output.y = packed.y << count; // TODO: fix - output.x = tgmath::lerp((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); + output.x = glsl::mix((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); return unpackUint64(output); }; @@ -318,7 +319,7 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, } else { - zExp = tgmath::lerp(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); + zExp = glsl::mix(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); } return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); @@ -357,15 +358,15 @@ static inline void normalizeFloat64Subnormal(uint64_t mantissa, uint32_t2 mantissaPacked = packUint64(mantissa); int shiftCount; uint32_t2 temp; - shiftCount = countLeadingZeros32(tgmath::lerp(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; - outExp = tgmath::lerp(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); + shiftCount = countLeadingZeros32(glsl::mix(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; + outExp = glsl::mix(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - temp.x = tgmath::lerp(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); - temp.y = tgmath::lerp(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); + temp.x = glsl::mix(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); + temp.y = glsl::mix(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); - outMantissa = tgmath::lerp(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); + outMantissa = glsl::mix(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); } NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 2ff17e608e..4a35e10a07 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -26,19 +26,12 @@ inline bool isnan(T val) return bool((ieee754::extractBiasedExponent(val) == ieee754::traits::specialValueExp) && (asUint & ieee754::traits::mantissaMask)); } -// TODO: better implementation template -//NBL_CONSTEXPR_INLINE_FUNC enable_if::type, T>::type lerp(T a, T b, bool c) -NBL_CONSTEXPR_INLINE_FUNC T lerp(T a, T b, bool c) +NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) { - return c ? b : a; -} - -template -NBL_CONSTEXPR_INLINE_FUNC bool isInf(Uint val) -{ - using AsFloat = typename float_of_size::type; - return (val & ~ieee754::traits::signMask) == ieee754::traits::inf; + using AsUint = typename unsigned_integer_of_size::type; + AsUint tmp = bit_cast(val); + return (tmp & ~ieee754::traits::signMask) == ieee754::traits::inf; } } From 916ba166df7860fb51ab7ac72fd5277989b5f7be Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 27 Aug 2024 21:46:40 +0100 Subject: [PATCH 039/432] Removed unnecessary branches from add operator --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 362 +++++------------- include/nbl/builtin/hlsl/ieee754.hlsl | 8 + .../hlsl/impl/emulated_float64_t_impl.hlsl | 4 + 4 files changed, 115 insertions(+), 261 deletions(-) diff --git a/examples_tests b/examples_tests index dc1010363e..912f012bfb 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit dc1010363efd212a943c03f40e1a2dc404b36bfc +Subproject commit 912f012bfbaa244309a57d1707c9438db5cff56a diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index fcda118b33..8030e9d221 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -11,6 +11,7 @@ namespace hlsl struct emulated_float64_t { using storage_t = uint64_t; + using this_t = emulated_float64_t; storage_t data; @@ -20,41 +21,41 @@ namespace hlsl return emulated_float64_t(bit_cast(float64_t(val))); }*/ - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(emulated_float64_t val) + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t val) { return val; } - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int32_t val) + NBL_CONSTEXPR_STATIC_INLINE this_t create(int32_t val) { - return bit_cast >(impl::castToUint64WithFloat64BitPattern(int64_t(val))); + return bit_cast(impl::castToUint64WithFloat64BitPattern(int64_t(val))); } - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(int64_t val) + NBL_CONSTEXPR_STATIC_INLINE this_t create(int64_t val) { - return bit_cast >(impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); } - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint32_t val) + NBL_CONSTEXPR_STATIC_INLINE this_t create(uint32_t val) { - return bit_cast >(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); + return bit_cast(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); } - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(uint64_t val) + NBL_CONSTEXPR_STATIC_INLINE this_t create(uint64_t val) { - return bit_cast >(impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); } - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float32_t val) + NBL_CONSTEXPR_STATIC_INLINE this_t create(float32_t val) { - emulated_float64_t output; + this_t output; output.data = impl::castToUint64WithFloat64BitPattern(val); return output; } - NBL_CONSTEXPR_STATIC_INLINE emulated_float64_t create(float64_t val) + NBL_CONSTEXPR_STATIC_INLINE this_t create(float64_t val) { - return bit_cast >(bit_cast(float64_t(val))); + return bit_cast(bit_cast(float64_t(val))); #ifdef __HLSL_VERSION emulated_float64_t retval; uint32_t lo, hi; @@ -62,7 +63,7 @@ namespace hlsl retval.data = (uint64_t(hi) << 32) | lo; return retval; #else - return bit_cast >(reinterpret_cast(val)); + return bit_cast(reinterpret_cast(val)); #endif } @@ -95,255 +96,89 @@ namespace hlsl } -#if 0 - uint64_t shiftLeftAllowNegBitCnt(uint64_t val, int n) - { - if (n < 0) - return val >> -n; - else - return val << n; - } -#endif - // arithmetic operators emulated_float64_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { -#if 0 + if (FlushDenormToZero) { - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) - return bit_cast >(ieee754::traits::quietNaN); - /*if (std::isinf(lhs) || std::isinf(rhs)) - { - if (std::isinf(lhs) && !std::isinf(rhs)) - return lhs; - if (std::isinf(rhs) && !std::isinf(lhs)) - return rhs; - if (rhs == lhs) - return rhs; - - return nan(); - }*/ - - int rp = min(ieee754::extractExponent(data), ieee754::extractExponent(rhs.data)) - ieee754::traits::mantissaBitCnt; + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return bit_cast(ieee754::traits::quietNaN); - uint64_t lhsRealMantissa = lhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); - uint64_t rhsRealMantissa = rhsMantissa | (1ull << ieee754::traits::mantissaBitCnt); - uint64_t lhsSignTmp = lhsSign >> (52 + 11); - uint64_t rhsSignTmp = rhsSign >> (52 + 11); + this_t retval = this_t::create(0ull); - uint64_t sign = 0u; - if (lhsSign != rhsSign) - { - uint64_t _min = max(data, rhs.data); - uint64_t _max = min(data, rhs.data); - uint64_t minAbs = _min ^ ieee754::traits::signMask; - if (minAbs > _max) - sign = ieee754::traits::signMask; - - } + uint64_t mantissa; + uint32_t3 mantissaExtended; + int biasedExp; - int64_t lhsMantissaTmp = (shiftLeftAllowNegBitCnt(lhsRealMantissa, lhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-lhsSignTmp)) + lhsSignTmp; - int64_t rhsMantissaTmp = (shiftLeftAllowNegBitCnt(rhsRealMantissa, rhsBiasedExp - rp - ieee754::traits::mantissaBitCnt - ieee754::traits::exponentBias) ^ (-rhsSignTmp)) + rhsSignTmp; + uint64_t lhsSign = ieee754::extractSign(data); + uint64_t rhsSign = ieee754::extractSign(rhs.data); - uint64_t addTmp = bit_cast(lhsMantissaTmp + rhsMantissaTmp); + // TODO: delete 2 below if not needed + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); - // renormalize - if (!FastMath && false) // TODO: hande nan - { + int64_t lhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(data)); + int64_t rhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(rhs.data)); - } - else + // TODO: branchless? + if (lhsSign != rhsSign) { -#ifndef __HLSL_VERSION - int l2 = log2(double(addTmp)); -#else - int intl2 = 0; -#endif - - if (!FastMath && (rp + l2 + 1 < nbl::hlsl::numeric_limits::min_exponent)) - { - return bit_cast >(impl::assembleFloat64(0, ieee754::traits::exponentMask, 0)); - } - else - { - rp = addTmp ? l2 + rp + ieee754::traits::exponentBias : 0; - return bit_cast >(impl::assembleFloat64( - sign, - (uint64_t(rp) << ieee754::traits::mantissaBitCnt) & ieee754::traits::exponentMask, - shiftLeftAllowNegBitCnt(addTmp, (ieee754::traits::mantissaBitCnt - l2)) & ieee754::traits::mantissaMask) - ); - } + if (lhsSign) + lhsNormMantissa *= -1; + if (rhsSign) + rhsNormMantissa *= -1; } - } -#endif - if (FlushDenormToZero) - { - emulated_float64_t retval = emulated_float64_t::create(0ull); - - uint64_t mantissa; - uint32_t3 mantissaExtended; - int biasedExp; - - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); int lhsBiasedExp = ieee754::extractBiasedExponent(data); int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); int expDiff = lhsBiasedExp - rhsBiasedExp; - if (lhsSign == rhsSign) - { - if (expDiff == 0) - { - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return bit_cast >(glsl::mix(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - mantissa = lhsMantissa + rhsMantissa; - if (lhsBiasedExp == 0) - return bit_cast >(impl::assembleFloat64(lhsSign, 0, mantissa)); - mantissaExtended.xy = impl::packUint64(mantissa); - mantissaExtended.x |= 0x00200000u; - mantissaExtended.z = 0u; - biasedExp = lhsBiasedExp; - - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); - } - else - { - if (expDiff < 0) - { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - const bool propagate = (lhsMantissa) != 0u; - return bit_cast >(glsl::mix(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = glsl::mix(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); - const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); - rhsMantissa = impl::unpackUint64(shifted.xy); - mantissaExtended.z = shifted.z; - biasedExp = lhsBiasedExp; - - lhsMantissa |= (1ull << 52); - mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); - --biasedExp; - if (!(mantissaExtended.x < 0x00200000u)) - { - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); - ++biasedExp; - } - - return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); - } + int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; + uint32_t shiftAmount = abs(expDiff); - // cannot happen but compiler cries about not every path returning value - return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); - } - else - { - lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); - rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); - - if (expDiff != 0) - { - uint32_t2 frac; - - if (expDiff < 0) - { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - lhsSign ^= ieee754::traits::signMask; - } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = lhsMantissa != 0u; - return bit_cast >(glsl::mix(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = glsl::mix(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); - rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); - lhsMantissa |= 0x4000000000000000ull; - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - biasedExp = lhsBiasedExp; - --biasedExp; - return bit_cast >(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); - } - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return bit_cast >(glsl::mix(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - rhsBiasedExp = glsl::mix(rhsBiasedExp, 1, lhsBiasedExp == 0); - lhsBiasedExp = glsl::mix(lhsBiasedExp, 1, lhsBiasedExp == 0); + // so lhsNormMantissa always holds mantissa of number with greater exponent + if (expDiff < 0) + swap(lhsNormMantissa, rhsNormMantissa); + rhsNormMantissa >>= shiftAmount; + int64_t resultMantissa = lhsNormMantissa + rhsNormMantissa; + const uint64_t resultSign = uint64_t((lhsSign && rhsSign) || (bit_cast(resultMantissa) & (lhsSign << 63))) << 63; + uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; - const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); - const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); + resultMantissa = abs(resultMantissa); - uint32_t2 frac; - uint64_t signOfDifference = 0; - if (rhsMantissaPacked.x < lhsMantissaPacked.x) - { - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - } - else if (lhsMantissaPacked.x < rhsMantissaPacked.x) - { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; - } - else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) - { - /* It is possible that frac.x and frac.y may be zero after this. */ - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - } - else - { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; - } + if (resultMantissa & 1ull << 53) + { + ++resultBiasedExp; + resultMantissa >>= 1; + } - biasedExp = glsl::mix(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); - lhsSign ^= signOfDifference; - uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); - uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return bit_cast >(glsl::mix(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + // TODO: better implementation with no loop + while (resultMantissa < (1ull << 52)) + { + --resultBiasedExp; + resultMantissa <<= 1; } + + resultMantissa &= ieee754::traits::mantissaMask; + uint64_t output = impl::assembleFloat64(resultSign, uint64_t(resultBiasedExp) << ieee754::traits::mantissaBitCnt, abs(resultMantissa)); + return bit_cast(output); } - else - { - //static_assert(false, "not implemented yet"); - return bit_cast >(0xdeadbeefbadcaffeull); - } + + // not implemented + if (!FlushDenormToZero) + return bit_cast(0xdeadbeefbadcaffeull); } emulated_float64_t operator+(float rhs) { - return bit_cast >(data) + create(rhs); + return bit_cast(data) + create(rhs); } emulated_float64_t operator-(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - emulated_float64_t lhs = bit_cast >(data); + emulated_float64_t lhs = bit_cast(data); emulated_float64_t rhsFlipped = rhs.flipSign(); return lhs + rhsFlipped; @@ -351,14 +186,14 @@ namespace hlsl emulated_float64_t operator-(float rhs) NBL_CONST_MEMBER_FUNC { - return bit_cast >(data) - create(rhs); + return bit_cast(data) - create(rhs); } emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if(FlushDenormToZero) { - emulated_float64_t retval = emulated_float64_t::create(0ull); + emulated_float64_t retval = this_t::create(0ull); uint64_t lhsSign = data & ieee754::traits::signMask; uint64_t rhsSign = rhs.data & ieee754::traits::signMask; @@ -374,11 +209,11 @@ namespace hlsl if (lhsBiasedExp == ieee754::traits::specialValueExp) { if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) - return bit_cast >(impl::propagateFloat64NaN(data, rhs.data)); + return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) - return bit_cast >(ieee754::traits::quietNaN); + return bit_cast(ieee754::traits::quietNaN); - return bit_cast >(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); + return bit_cast(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); } if (rhsBiasedExp == ieee754::traits::specialValueExp) { @@ -387,23 +222,23 @@ namespace hlsl #ifdef RELAXED_NAN_PROPAGATION return rhs.data; #else - return bit_cast >(impl::propagateFloat64NaN(data, rhs.data)); + return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); #endif if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) - return bit_cast >(ieee754::traits::quietNaN); + return bit_cast(ieee754::traits::quietNaN); - return bit_cast >(sign | ieee754::traits::exponentMask); + return bit_cast(sign | ieee754::traits::exponentMask); } if (lhsBiasedExp == 0) { if (lhsMantissa == 0u) - return bit_cast >(sign); + return bit_cast(sign); impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); } if (rhsBiasedExp == 0) { if (rhsMantissa == 0u) - return bit_cast >(sign); + return bit_cast(sign); impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); } } @@ -423,24 +258,24 @@ namespace hlsl } newPseudoMantissa &= (ieee754::traits::mantissaMask); - return bit_cast >(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); + return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); } else { //static_assert(false, "not implemented yet"); - return bit_cast >(0xdeadbeefbadcaffeull); + return bit_cast(0xdeadbeefbadcaffeull); } } emulated_float64_t operator*(float rhs) { - return bit_cast >(data) * create(rhs); + return bit_cast(data) * create(rhs); } - /*emulated_float64_t reciprocal(uint64_t x) + /*this_t reciprocal(uint64_t x) { - using ThisType = emulated_float64_t; - ThisType output = ThisType::bit_cast >((0xbfcdd6a18f6a6f52ULL - x) >> 1); + using ThisType = this_t; + ThisType output = ThisType::bit_cast((0xbfcdd6a18f6a6f52ULL - x) >> 1); output = output * output; return output; }*/ @@ -449,23 +284,23 @@ namespace hlsl { if (FlushDenormToZero) { - //return emulated_float64_t::bit_cast >(data) * reciprocal(rhs.data); + //return this_t::bit_cast(data) * reciprocal(rhs.data); if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return bit_cast >(ieee754::traits::quietNaN); + return bit_cast(ieee754::traits::quietNaN); if (!FastMath && ((rhs.data << 1) == 0)) - return bit_cast >(ieee754::traits::quietNaN); + return bit_cast(ieee754::traits::quietNaN); const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return bit_cast >(ieee754::traits::quietNaN | sign); + return bit_cast(ieee754::traits::quietNaN | sign); if (!FastMath && tgmath::isInf(data)) - return bit_cast >((data & ~ieee754::traits::signMask) | sign); + return bit_cast((data & ~ieee754::traits::signMask) | sign); if (!FastMath && tgmath::isInf(rhs.data)) - return bit_cast >(0ull | sign); + return bit_cast(0ull | sign); @@ -485,18 +320,18 @@ namespace hlsl mantissa &= ieee754::traits::mantissaMask; - return bit_cast >(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); + return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); } else { //static_assert(false, "not implemented yet"); - return bit_cast >(0xdeadbeefbadcaffeull); + return bit_cast(0xdeadbeefbadcaffeull); } } // relational operators // TODO: should `FlushDenormToZero` affect relational operators? - bool operator==(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + bool operator==(this_t rhs) NBL_CONST_MEMBER_FUNC { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; @@ -504,7 +339,7 @@ namespace hlsl if (!FastMath && impl::areBothZero(data, rhs.data)) return true; - const emulated_float64_t xored = bit_cast >(data ^ rhs.data); + const emulated_float64_t xored = bit_cast(data ^ rhs.data); // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) return true; @@ -513,7 +348,7 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - return !(bit_cast >(data) == rhs); + return !(bit_cast(data) == rhs); } bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { @@ -555,8 +390,8 @@ namespace hlsl return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); } - bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return !(bit_cast >(data) > bit_cast >(rhs.data)); } - bool operator>=(emulated_float64_t rhs) { return !(bit_cast >(data) < bit_cast >(rhs.data)); } + bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return !(bit_cast(data) > bit_cast(rhs.data)); } + bool operator>=(emulated_float64_t rhs) { return !(bit_cast(data) < bit_cast(rhs.data)); } //logical operators bool operator&&(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) && bool(rhs.data); } @@ -565,7 +400,7 @@ namespace hlsl emulated_float64_t flipSign() { - return bit_cast >(data ^ ieee754::traits::signMask); + return bit_cast(data ^ ieee754::traits::signMask); } bool isNaN() @@ -615,6 +450,13 @@ NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type ex {\ return extractMantissa(x.data);\ }\ +\ +template <>\ +NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +{\ + return extractNormalizeMantissa(x.data);\ +}\ +\ #define DEFINE_BIT_CAST_SPEC(...)\ template<>\ diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index e08831e2ce..d39526e04b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -147,6 +147,14 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext return impl::castToUintType(x) & traits::type>::mantissaMask; } +template +NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +{ + using AsUint = typename unsigned_integer_of_size::type; + using AsFloat = typename float_of_size::type; + return extractMantissa(x) | (AsUint(1) << traits::mantissaBitCnt); +} + template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) { diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index 92df407530..d86d5c6469 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -274,6 +274,9 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, } } } + + // overflow handling? + // if biased exp is lesser then 2045 if (0x7FD <= zExp) { if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) @@ -319,6 +322,7 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, } else { + // ?? zExp = glsl::mix(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); } From aff0ca44d8e22c46d0058170af2ebe5585d3dcb7 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 27 Aug 2024 23:15:07 +0100 Subject: [PATCH 040/432] Revert unintended changes --- include/nbl/builtin/hlsl/glsl_compat/core.hlsl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index bcc0c76bdb..88eed8202a 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -44,6 +44,7 @@ genIUType bitfieldInsert(genIUType const& Base, genIUType const& Insert, int Off // Fun fact: ideally atomics should detect the address space of `ptr` and narrow down the sync-scope properly // https://github.com/microsoft/DirectXShaderCompiler/issues/6508 // Would need own meta-type/tagged-type to implement, without & and fancy operator overloads... not posssible +// TODO: we can template on `StorageClass` instead of Ptr_T then resolve the memory scope and semantics properly template T atomicAdd(NBL_REF_ARG(T) ptr, T value) { @@ -120,9 +121,9 @@ T atomicCompSwap(NBL_REF_ARG(T) ptr, T comparator, T value) return spirv::atomicCompareExchange(ptr, spv::ScopeDevice, spv::MemorySemanticsMaskNone, spv::MemorySemanticsMaskNone, value, comparator); } template // DXC Workaround -enable_if_t, T> atomicCompSwap(Ptr_T ptr, T value) +enable_if_t, T> atomicCompSwap(Ptr_T ptr, T comparator, T value) { - return spirv::atomicCompareExchange(ptr, spv::ScopeDevice, spv::MemorySemanticsMaskNone, value); + return spirv::atomicCompareExchange(ptr, spv::ScopeDevice, spv::MemorySemanticsMaskNone, spv::MemorySemanticsMaskNone, value, comparator); } /** @@ -205,7 +206,7 @@ struct bitfieldExtract } }; -} +} //namespace impl template T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) From ede8b0cd415ce72c9faca37b6de81a39913be305 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 28 Aug 2024 21:44:28 +0100 Subject: [PATCH 041/432] Fixes --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 221 ++++++++++++++++-- .../hlsl/impl/emulated_float64_t_impl.hlsl | 53 +++-- include/nbl/builtin/hlsl/tgmath.hlsl | 4 +- 4 files changed, 242 insertions(+), 38 deletions(-) diff --git a/examples_tests b/examples_tests index 912f012bfb..da25126a01 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 912f012bfbaa244309a57d1707c9438db5cff56a +Subproject commit da25126a01b92ff244bc143852685947c98c0721 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 8030e9d221..de737f8a1c 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -49,7 +49,7 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE this_t create(float32_t val) { this_t output; - output.data = impl::castToUint64WithFloat64BitPattern(val); + output.data = impl::castFloat32ToStorageType(val); return output; } @@ -96,30 +96,191 @@ namespace hlsl } + // TODO: remove + emulated_float64_t addOld(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (FlushDenormToZero) + { + emulated_float64_t retval = emulated_float64_t::create(0ull); + + uint64_t mantissa; + uint32_t3 mantissaExtended; + int biasedExp; + + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + int expDiff = lhsBiasedExp - rhsBiasedExp; + + if (lhsSign == rhsSign) + { + if (expDiff == 0) + { + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = (lhsMantissa | rhsMantissa) != 0u; + return bit_cast >(glsl::mix(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + mantissa = lhsMantissa + rhsMantissa; + if (lhsBiasedExp == 0) + return bit_cast >(impl::assembleFloat64(lhsSign, 0, mantissa)); + mantissaExtended.xy = impl::packUint64(mantissa); + mantissaExtended.x |= 0x00200000u; + mantissaExtended.z = 0u; + biasedExp = lhsBiasedExp; + + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); + } + else + { + if (expDiff < 0) + { + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); + } + + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + const bool propagate = (lhsMantissa) != 0u; + return bit_cast >(glsl::mix(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = glsl::mix(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); + const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); + rhsMantissa = impl::unpackUint64(shifted.xy); + mantissaExtended.z = shifted.z; + biasedExp = lhsBiasedExp; + + lhsMantissa |= (1ull << 52); + mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); + --biasedExp; + if (!(mantissaExtended.x < 0x00200000u)) + { + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); + ++biasedExp; + } + + return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); + } + + // cannot happen but compiler cries about not every path returning value + return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); + } + else + { + lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); + rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); + + if (expDiff != 0) + { + uint32_t2 frac; + + if (expDiff < 0) + { + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); + lhsSign ^= ieee754::traits::signMask; + } + + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = lhsMantissa != 0u; + return bit_cast >(glsl::mix(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = glsl::mix(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); + rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); + lhsMantissa |= 0x4000000000000000ull; + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + biasedExp = lhsBiasedExp; + --biasedExp; + return bit_cast >(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); + } + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; + return bit_cast >(glsl::mix(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + rhsBiasedExp = glsl::mix(rhsBiasedExp, 1, lhsBiasedExp == 0); + lhsBiasedExp = glsl::mix(lhsBiasedExp, 1, lhsBiasedExp == 0); + + + const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); + const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); + + uint32_t2 frac; + uint64_t signOfDifference = 0; + if (rhsMantissaPacked.x < lhsMantissaPacked.x) + { + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + } + else if (lhsMantissaPacked.x < rhsMantissaPacked.x) + { + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; + } + else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) + { + /* It is possible that frac.x and frac.y may be zero after this. */ + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + } + else + { + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; + } + + biasedExp = glsl::mix(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); + lhsSign ^= signOfDifference; + uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); + uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); + return bit_cast >(glsl::mix(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + } + } + else + { + //static_assert(false, "not implemented yet"); + return bit_cast >(0xdeadbeefbadcaffeull); + } + } + // arithmetic operators - emulated_float64_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + return addOld(rhs); + if (FlushDenormToZero) { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return bit_cast(ieee754::traits::quietNaN); - this_t retval = this_t::create(0ull); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - uint64_t mantissa; - uint32_t3 mantissaExtended; - int biasedExp; + if (lhsBiasedExp == 0ull) + return bit_cast(rhs.data); + if (rhsBiasedExp == 0ull) + return bit_cast(data); uint64_t lhsSign = ieee754::extractSign(data); uint64_t rhsSign = ieee754::extractSign(rhs.data); - // TODO: delete 2 below if not needed - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + if (lhsSign != rhsSign) + return addOld(rhs); int64_t lhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(data)); int64_t rhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(rhs.data)); + lhsNormMantissa <<= 9; + rhsNormMantissa <<= 9; + // TODO: branchless? if (lhsSign != rhsSign) { @@ -129,9 +290,6 @@ namespace hlsl rhsNormMantissa *= -1; } - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - int expDiff = lhsBiasedExp - rhsBiasedExp; int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; @@ -142,7 +300,11 @@ namespace hlsl swap(lhsNormMantissa, rhsNormMantissa); rhsNormMantissa >>= shiftAmount; + int64_t resultMantissa = lhsNormMantissa + rhsNormMantissa; + + resultMantissa >>= 9; + const uint64_t resultSign = uint64_t((lhsSign && rhsSign) || (bit_cast(resultMantissa) & (lhsSign << 63))) << 63; uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; @@ -287,21 +449,21 @@ namespace hlsl //return this_t::bit_cast(data) * reciprocal(rhs.data); if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return bit_cast(ieee754::traits::quietNaN); - if (!FastMath && ((rhs.data << 1) == 0)) - return bit_cast(ieee754::traits::quietNaN); + return bit_cast(ieee754::traits::quietNaN); const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; + if (!FastMath && impl::isZero(rhs.data)) + return bit_cast(ieee754::traits::inf | sign); + if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return bit_cast(ieee754::traits::quietNaN | sign); + return bit_cast(ieee754::traits::quietNaN); if (!FastMath && tgmath::isInf(data)) - return bit_cast((data & ~ieee754::traits::signMask) | sign); + return bit_cast(ieee754::traits::inf | sign); if (!FastMath && tgmath::isInf(rhs.data)) - return bit_cast(0ull | sign); - + return bit_cast(0ull | sign); const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); @@ -348,6 +510,9 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + return !(bit_cast(data) == rhs); } bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC @@ -373,7 +538,7 @@ namespace hlsl bool operator>(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return true; + return false; if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) return false; if (!FastMath && impl::areBothZero(data, rhs.data)) @@ -390,8 +555,20 @@ namespace hlsl return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); } - bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return !(bit_cast(data) > bit_cast(rhs.data)); } - bool operator>=(emulated_float64_t rhs) { return !(bit_cast(data) < bit_cast(rhs.data)); } + bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + + return !(bit_cast(data) > bit_cast(rhs.data)); + } + bool operator>=(emulated_float64_t rhs) + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + + return !(bit_cast(data) < bit_cast(rhs.data)); + } //logical operators bool operator&&(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) && bool(rhs.data); } diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl index d86d5c6469..3045df49d4 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl @@ -80,16 +80,28 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t unpackUint64(uint32_t2 val) return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); } -inline uint64_t castToUint64WithFloat64BitPattern(float32_t val) +template +inline uint64_t castFloat32ToStorageType(float32_t val) { - uint32_t asUint = ieee754::impl::castToUintType(val); - - const uint64_t sign = (uint64_t(ieee754::traits::signMask) & asUint) << (sizeof(float32_t) * 8); - - const uint64_t biasedExp = (uint64_t(ieee754::extractExponent(val)) + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); - const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt); - - return sign | biasedExp | mantissa; + if (FlushDenormToZero) + { + const uint64_t sign = uint64_t(ieee754::extractSign(val)) << 63; + if (tgmath::isInf(val)) + return ieee754::traits::inf | sign; + uint32_t asUint = ieee754::impl::castToUintType(val); + const int f32BiasedExp = ieee754::extractBiasedExponent(val); + if (f32BiasedExp == 0) + return sign; + const uint64_t biasedExp = uint64_t(f32BiasedExp - ieee754::traits::exponentBias + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt); + + return sign | biasedExp | mantissa; + } + else + { + // static_assert(false); + return 0xdeadbeefbadcaffeull; + } }; inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) @@ -100,8 +112,18 @@ inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) #ifndef __HLSL_VERSION int exp = findMSB(val); #else - uint32_t2 valPacked = packUint64(val); - int exp = valPacked.x ? firstbithigh(valPacked.x) + 32 : firstbithigh(valPacked.y); + int exp = 63; + uint64_t mask = ieee754::traits::signMask; + while (!(val & mask)) + { + --exp; + mask >>= 1; + } + + + //uint32_t2 valPacked = packUint64(val); + //int exp = valPacked.x ? firstbithigh(valPacked.x) + 32 : firstbithigh(valPacked.y); + //exp = 63 - exp; #endif uint64_t mantissa; @@ -128,8 +150,6 @@ inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) else if ((val & roundingBit) && (stickyBit || (mantissa & 1))) val += roundingBit; - - //val += (1ull << (shiftCnt)) - 1; //mantissa = val >> shiftCntAbs; @@ -148,7 +168,7 @@ inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) inline uint64_t castToUint64WithFloat64BitPattern(int64_t val) { const uint64_t sign = val & ieee754::traits::signMask; - const uint64_t absVal = abs(val); + const uint64_t absVal = uint64_t(abs(val)); return sign | castToUint64WithFloat64BitPattern(absVal); }; @@ -386,6 +406,11 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignInfinity(uint64_t lhs, uint64_t rh return lhs == rhs && (lhs & ~ieee754::traits::signMask) == ieee754::traits::inf; } +NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +{ + return (val << 1) == 0; +} + NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 4a35e10a07..bd1e1f9e48 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -30,8 +30,10 @@ template NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) { using AsUint = typename unsigned_integer_of_size::type; + using AsFloat = typename float_of_size::type; + AsUint tmp = bit_cast(val); - return (tmp & ~ieee754::traits::signMask) == ieee754::traits::inf; + return (tmp & ~ieee754::traits::signMask) == ieee754::traits::inf; } } From 002581aba619a7a07b9bdbfd17bde230dc710b89 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 28 Aug 2024 23:24:37 +0100 Subject: [PATCH 042/432] Fixed shaders so they compile even if portable_float64_t is not emulated --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 8 +--- .../hlsl/emulated_float64_t_utils.hlsl | 42 +++++++++++++++---- include/nbl/builtin/hlsl/ieee754.hlsl | 7 +++- include/nbl/builtin/hlsl/tgmath.hlsl | 2 +- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/examples_tests b/examples_tests index da25126a01..70ee941a76 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit da25126a01b92ff244bc143852685947c98c0721 +Subproject commit 70ee941a7658f590c7ea4d3a00ce207eeec12b9d diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index de737f8a1c..914e5df43b 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -55,7 +55,6 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE this_t create(float64_t val) { - return bit_cast(bit_cast(float64_t(val))); #ifdef __HLSL_VERSION emulated_float64_t retval; uint32_t lo, hi; @@ -272,9 +271,6 @@ namespace hlsl uint64_t lhsSign = ieee754::extractSign(data); uint64_t rhsSign = ieee754::extractSign(rhs.data); - if (lhsSign != rhsSign) - return addOld(rhs); - int64_t lhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(data)); int64_t rhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(rhs.data)); @@ -726,8 +722,8 @@ DEFINE_BIT_CAST_SPEC(emulated_float64_t); DEFINE_BIT_CAST_SPEC(emulated_float64_t); DEFINE_BIT_CAST_SPEC(emulated_float64_t); -template -struct is_floating_point > : bool_constant {}; +//template +//struct is_floating_point > : bool_constant {}; namespace ieee754 { diff --git a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl index 7e16081b31..133c9842bd 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl @@ -294,6 +294,11 @@ struct emulated_matrix // : emulated_matrix_base @@ -363,14 +368,12 @@ using portable_matrix_t3x3 = portable_matrix_t; using portable_matrix64_t2x2 = portable_matrix_t2x2 >; using portable_matrix64_t3x3 = portable_matrix_t3x3 >; + +// TODO: fix template NBL_CONSTEXPR_INLINE_FUNC portable_float64_t<> create_portable_float64_t(T val) { - //return impl::portable_float64_t_creator::create(val); - if (impl::is_emulated >::value) - return portable_float64_t<>::create(val); - else - return portable_float64_t<>(val); + return _static_cast >(val); } template @@ -436,13 +439,34 @@ NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t2_from_ return output; } -template >::value> -inline float32_t convert_portable_float64_t_to_float(portable_float64_t<> val) +namespace impl { - - return float32_t(val); + template + struct PortableMul64Helper + { + static inline V multiply(M mat, V vec) + { + return mat * vec; + } + }; + + template + struct PortableMul64Helper + { + static inline V multiply(M mat, V vec) + { + return mul(mat, vec); + } + }; } +template +V portableMul64(M mat, V vec) +{ + return PortableMul64Helper >::multiply(mat, vec); +} + + } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index d39526e04b..704e8b025c 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -51,6 +51,7 @@ namespace impl template struct traits_base { + static_assert(is_same::value || is_same::value); NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = int16_t(0xbeef); NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = int16_t(0xbeef); }; @@ -158,13 +159,15 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) { - return (impl::castToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); + using AsFloat = typename float_of_size::type; + return (impl::castToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { - return impl::castToUintType(x) & traits::signMask; + using AsFloat = typename float_of_size::type; + return impl::castToUintType(x) & traits::signMask; } } diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index bd1e1f9e48..75cddf27c0 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -33,7 +33,7 @@ NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) using AsFloat = typename float_of_size::type; AsUint tmp = bit_cast(val); - return (tmp & ~ieee754::traits::signMask) == ieee754::traits::inf; + return (tmp & ~ieee754::traits::signMask) == ieee754::traits::inf; } } From 9b8e61e2c356978a31eb6f5f3a31ade47518b646 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 29 Aug 2024 00:57:21 +0100 Subject: [PATCH 043/432] Saving work --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 103 ++++++++++++------ .../hlsl/emulated_float64_t_utils.hlsl | 40 +++---- 3 files changed, 85 insertions(+), 60 deletions(-) diff --git a/examples_tests b/examples_tests index 70ee941a76..c08cd35f85 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 70ee941a7658f590c7ea4d3a00ce207eeec12b9d +Subproject commit c08cd35f85ee2bf99110905e2072e239ca44470c diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index 914e5df43b..b1bbe88280 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -72,29 +72,6 @@ namespace hlsl return emulated_float64_t(bit_cast(float64_t(val))); }*/ - inline float getAsFloat32() - { - int exponent = ieee754::extractExponent(data); - if (!FastMath) - { - if (exponent > 127) - return bit_cast(ieee754::traits::inf); - if (exponent < -126) - return -bit_cast(ieee754::traits::inf); - if (tgmath::isnan(data)) - return bit_cast(ieee754::traits::quietNaN); - } - - //return float(bit_cast(data)); - // TODO: fix - uint32_t sign = uint32_t((data & ieee754::traits::signMask) >> 32); - uint32_t biasedExponent = uint32_t(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; - uint32_t mantissa = uint32_t(data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; - - return bit_cast(sign | biasedExponent | mantissa); - - } - // TODO: remove emulated_float64_t addOld(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { @@ -656,30 +633,34 @@ struct static_cast_helper,void // TODO: test static inline To cast(From v) { + using ToAsFloat = typename float_of_size::type; + using ToAsUint = typename unsigned_integer_of_size::type; + + if (is_same_v) return To(bit_cast(v.data)); if (is_floating_point::value) { + const int exponent = ieee754::extractExponent(v.data); if (!From::supportsFastMath()) { - if (exponent > ieee754::traits::exponentMax) - return bit_cast(ieee754::traits::inf); - if (exponent < ieee754::traits::exponentMin) - return -bit_cast(ieee754::traits::inf); + if (exponent > ieee754::traits::exponentMax) + return bit_cast(ieee754::traits::inf); + if (exponent < ieee754::traits::exponentMin) + return -bit_cast(ieee754::traits::inf); if (tgmath::isnan(v.data)) - return bit_cast(ieee754::traits::quietNaN); + return bit_cast(ieee754::traits::quietNaN); } - using AsUint = typename unsigned_integer_of_size::type; const uint32_t toBitSize = sizeof(To) * 8; - const AsUint sign = AsUint(ieee754::extractSign(v.data) << (toBitSize - 1)); - const AsUint biasedExponent = AsUint(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; - const AsUint mantissa = AsUint(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; + const ToAsUint sign = ToAsUint(ieee754::extractSign(v.data) << (toBitSize - 1)); + const ToAsUint biasedExponent = ToAsUint(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; + const ToAsUint mantissa = ToAsUint(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; - return bit_cast(sign | biasedExponent | mantissa); + return bit_cast(sign | biasedExponent | mantissa); } // NOTE: casting from negative float to unsigned int is an UB, function will return abs value in this case @@ -711,7 +692,60 @@ struct static_cast_helper,void } // assert(false); - return 0xdeadbeefbadcaffeull; + return To(0xdeadbeefbadcaffeull); + } +}; + +template +struct static_cast_helper, float32_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(float32_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, float64_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(float64_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, uint32_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(uint32_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, uint64_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(uint64_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, emulated_float64_t, void> +{ + static inline emulated_float64_t cast(emulated_float64_t v) + { + return v; } }; @@ -744,6 +778,5 @@ IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t; namespace impl { -template -struct is_emulated -{ - NBL_CONSTEXPR_STATIC_INLINE bool value = is_same_v > || - is_same_v > || - is_same_v > || - is_same_v >; -}; template::value > struct portable_vector @@ -441,29 +433,29 @@ NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t2_from_ namespace impl { - template - struct PortableMul64Helper +template +struct PortableMul64Helper +{ + static inline V multiply(M mat, V vec) { - static inline V multiply(M mat, V vec) - { - return mat * vec; - } - }; - - template - struct PortableMul64Helper + return mat * vec; + } +}; + +template +struct PortableMul64Helper +{ + static inline V multiply(M mat, V vec) { - static inline V multiply(M mat, V vec) - { - return mul(mat, vec); - } - }; + return mul(mat, vec); + } +}; } template V portableMul64(M mat, V vec) { - return PortableMul64Helper >::multiply(mat, vec); + return impl::PortableMul64Helper >::multiply(mat, vec); } From a2cd3956b20b9b12ea3456a3b2e8b765241a595b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 30 Aug 2024 15:34:59 +0100 Subject: [PATCH 044/432] Found the bug --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated_float64_t.hlsl | 17 +++++++++-------- include/nbl/builtin/hlsl/ieee754.hlsl | 2 +- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 6 ++++-- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/examples_tests b/examples_tests index c08cd35f85..a6b14dacdd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c08cd35f85ee2bf99110905e2072e239ca44470c +Subproject commit a6b14dacddc043e73191ed1a431a4341bc8b5e77 diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl index b1bbe88280..f96752ab63 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated_float64_t.hlsl @@ -404,7 +404,7 @@ namespace hlsl emulated_float64_t operator*(float rhs) { - return bit_cast(data) * create(rhs); + return _static_cast(data) * create(rhs); } /*this_t reciprocal(uint64_t x) @@ -626,7 +626,7 @@ template struct static_cast_helper,void> { // TODO: - // static_assert(is_arithmetic::value); + static_assert(is_scalar::value); using From = emulated_float64_t; @@ -646,12 +646,13 @@ struct static_cast_helper,void const int exponent = ieee754::extractExponent(v.data); if (!From::supportsFastMath()) { - if (exponent > ieee754::traits::exponentMax) - return bit_cast(ieee754::traits::inf); - if (exponent < ieee754::traits::exponentMin) - return -bit_cast(ieee754::traits::inf); - if (tgmath::isnan(v.data)) - return bit_cast(ieee754::traits::quietNaN); + // TODO: i have no idea why it doesn't work, fix + //if (exponent > ieee754::traits::exponentMax) + // return bit_cast(ieee754::traits::inf); + //if (exponent < ieee754::traits::exponentMin) + // return -bit_cast(ieee754::traits::inf); + //if (tgmath::isnan(v.data)) + // return bit_cast(ieee754::traits::quietNaN); } diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 704e8b025c..7e36501f0a 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -122,7 +122,7 @@ template inline int extractExponent(T x) { using AsFloat = typename float_of_size::type; - return int(extractBiasedExponent(x)) - int(traits::exponentBias); + return int(extractBiasedExponent(x)) - traits::exponentBias; } template diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index 44d79800ef..d3178a29f7 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -563,7 +563,8 @@ static math::equations::Quartic getBezierBezierIntersectionEquation(NBL portable_float64_t<> d = (B.x * C.x * k0 * 2.0f) + (B.x * C.y * k1) + (B.x * k3) + (C.x * B.y * k1) + (B.y * C.y * k2 * 2.0f) + (B.y * k4); portable_float64_t<> e = ((C.x * C.x) * k0) + (C.x * C.y * k1) + (C.x * k3) + ((C.y * C.y) * k2) + (C.y * k4) + (k5); - return math::equations::Quartic >::construct(a, b, c, d, e); + return math::equations::Quartic::construct( + _static_cast(a), _static_cast(b), _static_cast(c), _static_cast(d), _static_cast(e)); } // This function returns the analytic quadratic equation to solve for bezier's t value for intersection with another bezier curve @@ -581,7 +582,8 @@ static math::equations::Quadratic getBezierLineIntersectionEquation(Qua bezier.P1 = mul(rotate, bezier.P1 - lineStart); bezier.P2 = mul(rotate, bezier.P2 - lineStart); Quadratic quadratic = Quadratic::constructFromBezier(bezier); - return math::equations::Quadratic >::construct(quadratic.A.y, quadratic.B.y, quadratic.C.y); + return math::equations::Quadratic::construct( + _static_cast(quadratic.A.y), _static_cast(quadratic.B.y), _static_cast(quadratic.C.y)); } } // namespace shapes From 6203ea66a037729118f7215d5e88cd26b2119bb4 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 3 Sep 2024 18:08:26 +0100 Subject: [PATCH 045/432] Fixed a bug --- examples_tests | 2 +- .../{ => emulated}/emulated_float64_t.hlsl | 1571 +++++++++-------- .../emulated_float64_t_impl.hlsl | 956 +++++----- .../emulated_float64_t_utils.hlsl | 4 +- .../builtin/hlsl/{ => ieee754}/ieee754.hlsl | 20 +- .../hlsl/math/equations/quadratic.hlsl | 2 +- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 4 +- include/nbl/builtin/hlsl/tgmath.hlsl | 4 +- src/nbl/builtin/CMakeLists.txt | 10 +- src/nbl/video/CVulkanPhysicalDevice.cpp | 3 +- 10 files changed, 1260 insertions(+), 1316 deletions(-) rename include/nbl/builtin/hlsl/{ => emulated}/emulated_float64_t.hlsl (83%) rename include/nbl/builtin/hlsl/{impl => emulated}/emulated_float64_t_impl.hlsl (82%) rename include/nbl/builtin/hlsl/{ => emulated}/emulated_float64_t_utils.hlsl (98%) rename include/nbl/builtin/hlsl/{ => ieee754}/ieee754.hlsl (87%) diff --git a/examples_tests b/examples_tests index a6b14dacdd..f8f2c23aab 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a6b14dacddc043e73191ed1a431a4341bc8b5e77 +Subproject commit f8f2c23aab0092015b3bf31cdab9875c1435f5dc diff --git a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl similarity index 83% rename from include/nbl/builtin/hlsl/emulated_float64_t.hlsl rename to include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl index f96752ab63..088e535c5b 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl @@ -1,783 +1,788 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ - -#include - -namespace nbl -{ -namespace hlsl -{ - template - struct emulated_float64_t - { - using storage_t = uint64_t; - using this_t = emulated_float64_t; - - storage_t data; - - // constructors - /*static emulated_float64_t create(uint16_t val) - { - return emulated_float64_t(bit_cast(float64_t(val))); - }*/ - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t val) - { - return val; - } - - NBL_CONSTEXPR_STATIC_INLINE this_t create(int32_t val) - { - return bit_cast(impl::castToUint64WithFloat64BitPattern(int64_t(val))); - } - - NBL_CONSTEXPR_STATIC_INLINE this_t create(int64_t val) - { - return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); - } - - NBL_CONSTEXPR_STATIC_INLINE this_t create(uint32_t val) - { - return bit_cast(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); - } - - NBL_CONSTEXPR_STATIC_INLINE this_t create(uint64_t val) - { - return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); - } - - NBL_CONSTEXPR_STATIC_INLINE this_t create(float32_t val) - { - this_t output; - output.data = impl::castFloat32ToStorageType(val); - return output; - } - - NBL_CONSTEXPR_STATIC_INLINE this_t create(float64_t val) - { -#ifdef __HLSL_VERSION - emulated_float64_t retval; - uint32_t lo, hi; - asuint(val, lo, hi); - retval.data = (uint64_t(hi) << 32) | lo; - return retval; -#else - return bit_cast(reinterpret_cast(val)); -#endif - } - - // TODO: unresolved external symbol imath_half_to_float_table - /*static emulated_float64_t create(float16_t val) - { - return emulated_float64_t(bit_cast(float64_t(val))); - }*/ - - // TODO: remove - emulated_float64_t addOld(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if (FlushDenormToZero) - { - emulated_float64_t retval = emulated_float64_t::create(0ull); - - uint64_t mantissa; - uint32_t3 mantissaExtended; - int biasedExp; - - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - int expDiff = lhsBiasedExp - rhsBiasedExp; - - if (lhsSign == rhsSign) - { - if (expDiff == 0) - { - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return bit_cast >(glsl::mix(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - mantissa = lhsMantissa + rhsMantissa; - if (lhsBiasedExp == 0) - return bit_cast >(impl::assembleFloat64(lhsSign, 0, mantissa)); - mantissaExtended.xy = impl::packUint64(mantissa); - mantissaExtended.x |= 0x00200000u; - mantissaExtended.z = 0u; - biasedExp = lhsBiasedExp; - - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); - } - else - { - if (expDiff < 0) - { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - const bool propagate = (lhsMantissa) != 0u; - return bit_cast >(glsl::mix(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = glsl::mix(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); - const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); - rhsMantissa = impl::unpackUint64(shifted.xy); - mantissaExtended.z = shifted.z; - biasedExp = lhsBiasedExp; - - lhsMantissa |= (1ull << 52); - mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); - --biasedExp; - if (!(mantissaExtended.x < 0x00200000u)) - { - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); - ++biasedExp; - } - - return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); - } - - // cannot happen but compiler cries about not every path returning value - return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); - } - else - { - lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); - rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); - - if (expDiff != 0) - { - uint32_t2 frac; - - if (expDiff < 0) - { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - lhsSign ^= ieee754::traits::signMask; - } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = lhsMantissa != 0u; - return bit_cast >(glsl::mix(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = glsl::mix(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); - rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); - lhsMantissa |= 0x4000000000000000ull; - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - biasedExp = lhsBiasedExp; - --biasedExp; - return bit_cast >(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); - } - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return bit_cast >(glsl::mix(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - rhsBiasedExp = glsl::mix(rhsBiasedExp, 1, lhsBiasedExp == 0); - lhsBiasedExp = glsl::mix(lhsBiasedExp, 1, lhsBiasedExp == 0); - - - const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); - const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); - - uint32_t2 frac; - uint64_t signOfDifference = 0; - if (rhsMantissaPacked.x < lhsMantissaPacked.x) - { - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - } - else if (lhsMantissaPacked.x < rhsMantissaPacked.x) - { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; - } - else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) - { - /* It is possible that frac.x and frac.y may be zero after this. */ - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - } - else - { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; - } - - biasedExp = glsl::mix(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); - lhsSign ^= signOfDifference; - uint64_t retval_0 = impl::packFloat64(uint32_t(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); - uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return bit_cast >(glsl::mix(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); - } - } - else - { - //static_assert(false, "not implemented yet"); - return bit_cast >(0xdeadbeefbadcaffeull); - } - } - - // arithmetic operators - this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - return addOld(rhs); - - if (FlushDenormToZero) - { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return bit_cast(ieee754::traits::quietNaN); - - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - if (lhsBiasedExp == 0ull) - return bit_cast(rhs.data); - if (rhsBiasedExp == 0ull) - return bit_cast(data); - - uint64_t lhsSign = ieee754::extractSign(data); - uint64_t rhsSign = ieee754::extractSign(rhs.data); - - int64_t lhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(data)); - int64_t rhsNormMantissa = int64_t(ieee754::extractNormalizeMantissa(rhs.data)); - - lhsNormMantissa <<= 9; - rhsNormMantissa <<= 9; - - // TODO: branchless? - if (lhsSign != rhsSign) - { - if (lhsSign) - lhsNormMantissa *= -1; - if (rhsSign) - rhsNormMantissa *= -1; - } - - int expDiff = lhsBiasedExp - rhsBiasedExp; - - int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; - uint32_t shiftAmount = abs(expDiff); - - // so lhsNormMantissa always holds mantissa of number with greater exponent - if (expDiff < 0) - swap(lhsNormMantissa, rhsNormMantissa); - - rhsNormMantissa >>= shiftAmount; - - int64_t resultMantissa = lhsNormMantissa + rhsNormMantissa; - - resultMantissa >>= 9; - - const uint64_t resultSign = uint64_t((lhsSign && rhsSign) || (bit_cast(resultMantissa) & (lhsSign << 63))) << 63; - uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; - - resultMantissa = abs(resultMantissa); - - if (resultMantissa & 1ull << 53) - { - ++resultBiasedExp; - resultMantissa >>= 1; - } - - // TODO: better implementation with no loop - while (resultMantissa < (1ull << 52)) - { - --resultBiasedExp; - resultMantissa <<= 1; - } - - resultMantissa &= ieee754::traits::mantissaMask; - uint64_t output = impl::assembleFloat64(resultSign, uint64_t(resultBiasedExp) << ieee754::traits::mantissaBitCnt, abs(resultMantissa)); - return bit_cast(output); - } - - // not implemented - if (!FlushDenormToZero) - return bit_cast(0xdeadbeefbadcaffeull); - } - - emulated_float64_t operator+(float rhs) - { - return bit_cast(data) + create(rhs); - } - - emulated_float64_t operator-(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - emulated_float64_t lhs = bit_cast(data); - emulated_float64_t rhsFlipped = rhs.flipSign(); - - return lhs + rhsFlipped; - } - - emulated_float64_t operator-(float rhs) NBL_CONST_MEMBER_FUNC - { - return bit_cast(data) - create(rhs); - } - - emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if(FlushDenormToZero) - { - emulated_float64_t retval = this_t::create(0ull); - - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; - uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - if (!FastMath) - { - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) - return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); - if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) - return bit_cast(ieee754::traits::quietNaN); - - return bit_cast(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); - } - if (rhsBiasedExp == ieee754::traits::specialValueExp) - { - /* a cannot be NaN, but is b NaN? */ - if (rhsMantissa != 0u) -#ifdef RELAXED_NAN_PROPAGATION - return rhs.data; -#else - return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); -#endif - if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) - return bit_cast(ieee754::traits::quietNaN); - - return bit_cast(sign | ieee754::traits::exponentMask); - } - if (lhsBiasedExp == 0) - { - if (lhsMantissa == 0u) - return bit_cast(sign); - impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); - } - if (rhsBiasedExp == 0) - { - if (rhsMantissa == 0u) - return bit_cast(sign); - impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); - } - } - - const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); - const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); - const uint64_t hi_r = (rhsMantissa >> 21) | (1ull << 31); - const uint64_t lo_r = rhsMantissa & ((1ull << 21) - 1); - - //const uint64_t RoundToNearest = (1ull << 31) - 1; - uint64_t newPseudoMantissa = ((hi_l * hi_r) >> 10) + ((hi_l * lo_r + lo_l * hi_r/* + RoundToNearest*/) >> 31); - - if (newPseudoMantissa & (0x1ull << 53)) - { - newPseudoMantissa >>= 1; - ++exp; - } - newPseudoMantissa &= (ieee754::traits::mantissaMask); - - return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); - } - else - { - //static_assert(false, "not implemented yet"); - return bit_cast(0xdeadbeefbadcaffeull); - } - } - - emulated_float64_t operator*(float rhs) - { - return _static_cast(data) * create(rhs); - } - - /*this_t reciprocal(uint64_t x) - { - using ThisType = this_t; - ThisType output = ThisType::bit_cast((0xbfcdd6a18f6a6f52ULL - x) >> 1); - output = output * output; - return output; - }*/ - - emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if (FlushDenormToZero) - { - //return this_t::bit_cast(data) * reciprocal(rhs.data); - - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return bit_cast(ieee754::traits::quietNaN); - - const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - - if (!FastMath && impl::isZero(rhs.data)) - return bit_cast(ieee754::traits::inf | sign); - - if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return bit_cast(ieee754::traits::quietNaN); - - if (!FastMath && tgmath::isInf(data)) - return bit_cast(ieee754::traits::inf | sign); - - if (!FastMath && tgmath::isInf(rhs.data)) - return bit_cast(0ull | sign); - - - const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); - const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); - - int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; - - uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); - uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); - - while (mantissa < (1ull << 52)) - { - mantissa <<= 1; - exp--; - } - - mantissa &= ieee754::traits::mantissaMask; - - return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); - } - else - { - //static_assert(false, "not implemented yet"); - return bit_cast(0xdeadbeefbadcaffeull); - } - } - - // relational operators - // TODO: should `FlushDenormToZero` affect relational operators? - bool operator==(this_t rhs) NBL_CONST_MEMBER_FUNC - { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - // TODO: i'm not sure about this one - if (!FastMath && impl::areBothZero(data, rhs.data)) - return true; - - const emulated_float64_t xored = bit_cast(data ^ rhs.data); - // TODO: check what fast math returns for -0 == 0 - if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) - return true; - - return !(xored.data); - } - bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - - return !(bit_cast(data) == rhs); - } - bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) - return false; - if (!FastMath && impl::areBothZero(data, rhs.data)) - return false; - - const uint64_t lhsSign = ieee754::extractSign(data); - const uint64_t rhsSign = ieee754::extractSign(rhs.data); - - // flip bits of negative numbers and flip signs of all numbers - uint64_t lhsFlipped = data ^ ((0x7FFFFFFFFFFFFFFFull * lhsSign) | ieee754::traits::signMask); - uint64_t rhsFlipped = rhs.data ^ ((0x7FFFFFFFFFFFFFFFull * rhsSign) | ieee754::traits::signMask); - - uint64_t diffBits = lhsFlipped ^ rhsFlipped; - - return (lhsFlipped & diffBits) < (rhsFlipped & diffBits); - } - bool operator>(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) - return false; - if (!FastMath && impl::areBothZero(data, rhs.data)) - return false; - - const uint64_t lhsSign = ieee754::extractSign(data); - const uint64_t rhsSign = ieee754::extractSign(rhs.data); - - // flip bits of negative numbers and flip signs of all numbers - uint64_t lhsFlipped = data ^ ((0x7FFFFFFFFFFFFFFFull * lhsSign) | ieee754::traits::signMask); - uint64_t rhsFlipped = rhs.data ^ ((0x7FFFFFFFFFFFFFFFull * rhsSign) | ieee754::traits::signMask); - - uint64_t diffBits = lhsFlipped ^ rhsFlipped; - - return (lhsFlipped & diffBits) > (rhsFlipped & diffBits); - } - bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - - return !(bit_cast(data) > bit_cast(rhs.data)); - } - bool operator>=(emulated_float64_t rhs) - { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - - return !(bit_cast(data) < bit_cast(rhs.data)); - } - - //logical operators - bool operator&&(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) && bool(rhs.data); } - bool operator||(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) || bool(rhs.data); } - bool operator!() NBL_CONST_MEMBER_FUNC { return !bool(data); } - - emulated_float64_t flipSign() - { - return bit_cast(data ^ ieee754::traits::signMask); - } - - bool isNaN() - { - return tgmath::isnan(data); - } - - NBL_CONSTEXPR_STATIC_INLINE bool supportsFastMath() - { - return FastMath; - } - }; - -#define IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(...) \ -template<>\ -struct traits_base<__VA_ARGS__ >\ -{\ - NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 11;\ - NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52;\ -};\ -template<>\ -inline uint32_t extractBiasedExponent(__VA_ARGS__ x)\ -{\ - return extractBiasedExponent(x.data);\ -}\ -\ -template<>\ -inline int extractExponent(__VA_ARGS__ x)\ -{\ - return extractExponent(x.data);\ -}\ -\ -template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ -{\ - return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ -}\ -\ -template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ -{\ - return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ -}\ -\ -template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ -{\ - return extractMantissa(x.data);\ -}\ -\ -template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ -{\ - return extractNormalizeMantissa(x.data);\ -}\ -\ - -#define DEFINE_BIT_CAST_SPEC(...)\ -template<>\ -NBL_CONSTEXPR_FUNC __VA_ARGS__ bit_cast<__VA_ARGS__, uint64_t>(NBL_CONST_REF_ARG(uint64_t) val)\ -{\ -__VA_ARGS__ output; \ -output.data = val; \ -\ -return output; \ -}\ -\ - -namespace impl -{ - -template -struct static_cast_helper,void> -{ - // TODO: - static_assert(is_scalar::value); - - using From = emulated_float64_t; - - // TODO: test - static inline To cast(From v) - { - using ToAsFloat = typename float_of_size::type; - using ToAsUint = typename unsigned_integer_of_size::type; - - - if (is_same_v) - return To(bit_cast(v.data)); - - if (is_floating_point::value) - { - - const int exponent = ieee754::extractExponent(v.data); - if (!From::supportsFastMath()) - { - // TODO: i have no idea why it doesn't work, fix - //if (exponent > ieee754::traits::exponentMax) - // return bit_cast(ieee754::traits::inf); - //if (exponent < ieee754::traits::exponentMin) - // return -bit_cast(ieee754::traits::inf); - //if (tgmath::isnan(v.data)) - // return bit_cast(ieee754::traits::quietNaN); - } - - - const uint32_t toBitSize = sizeof(To) * 8; - const ToAsUint sign = ToAsUint(ieee754::extractSign(v.data) << (toBitSize - 1)); - const ToAsUint biasedExponent = ToAsUint(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; - const ToAsUint mantissa = ToAsUint(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; - - return bit_cast(sign | biasedExponent | mantissa); - } - - // NOTE: casting from negative float to unsigned int is an UB, function will return abs value in this case - if (is_integral::value) - { - const int exponent = ieee754::extractExponent(v.data); - if (exponent < 0) - return 0; - - uint64_t unsignedOutput = ieee754::extractMantissa(v.data) & 1ull << ieee754::traits::mantissaBitCnt; - const int shiftAmount = exponent - int(ieee754::traits::mantissaBitCnt); - - if (shiftAmount < 0) - unsignedOutput <<= -shiftAmount; - else - unsignedOutput >>= shiftAmount; - - if (is_signed::value) - { - int64_t signedOutput64 = unsignedOutput & ((1ull << 63) - 1); - To signedOutput = To(signedOutput64); - if (ieee754::extractSignPreserveBitPattern(v.data) != 0) - signedOutput = -signedOutput; - - return signedOutput; - } - - return To(unsignedOutput); - } - - // assert(false); - return To(0xdeadbeefbadcaffeull); - } -}; - -template -struct static_cast_helper, float32_t, void> -{ - using To = emulated_float64_t; - - static inline To cast(float32_t v) - { - return To::create(v); - } -}; - -template -struct static_cast_helper, float64_t, void> -{ - using To = emulated_float64_t; - - static inline To cast(float64_t v) - { - return To::create(v); - } -}; - -template -struct static_cast_helper, uint32_t, void> -{ - using To = emulated_float64_t; - - static inline To cast(uint32_t v) - { - return To::create(v); - } -}; - -template -struct static_cast_helper, uint64_t, void> -{ - using To = emulated_float64_t; - - static inline To cast(uint64_t v) - { - return To::create(v); - } -}; - -template -struct static_cast_helper, emulated_float64_t, void> -{ - static inline emulated_float64_t cast(emulated_float64_t v) - { - return v; - } -}; - -} - -DEFINE_BIT_CAST_SPEC(emulated_float64_t); -DEFINE_BIT_CAST_SPEC(emulated_float64_t); -DEFINE_BIT_CAST_SPEC(emulated_float64_t); -DEFINE_BIT_CAST_SPEC(emulated_float64_t); - -//template -//struct is_floating_point > : bool_constant {}; - -namespace ieee754 -{ -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); -IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); -} - -} -} - -#undef FLOAT_ROUND_NEAREST_EVEN -#undef FLOAT_ROUND_TO_ZERO -#undef FLOAT_ROUND_DOWN -#undef FLOAT_ROUND_UP -#undef FLOAT_ROUNDING_MODE - -#undef IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE -#undef DEFINE_BIT_CAST_SPEC - -#endif +#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + template + struct emulated_float64_t + { + using storage_t = uint64_t; + using this_t = emulated_float64_t; + + storage_t data; + + // constructors + /*static emulated_float64_t create(uint16_t val) + { + return emulated_float64_t(bit_cast(float64_t(val))); + }*/ + + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t val) + { + return val; + } + + NBL_CONSTEXPR_STATIC_INLINE this_t create(int32_t val) + { + return bit_cast(impl::castToUint64WithFloat64BitPattern(int64_t(val))); + } + + NBL_CONSTEXPR_STATIC_INLINE this_t create(int64_t val) + { + return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); + } + + NBL_CONSTEXPR_STATIC_INLINE this_t create(uint32_t val) + { + return bit_cast(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); + } + + NBL_CONSTEXPR_STATIC_INLINE this_t create(uint64_t val) + { + return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); + } + + NBL_CONSTEXPR_STATIC_INLINE this_t create(float32_t val) + { + this_t output; + output.data = impl::castFloat32ToStorageType(val); + return output; + } + + NBL_CONSTEXPR_STATIC_INLINE this_t create(float64_t val) + { +#ifdef __HLSL_VERSION + emulated_float64_t retval; + uint32_t lo, hi; + asuint(val, lo, hi); + retval.data = (uint64_t(hi) << 32) | lo; + return retval; +#else + return bit_cast(reinterpret_cast(val)); +#endif + } + + // TODO: unresolved external symbol imath_half_to_float_table + /*static emulated_float64_t create(float16_t val) + { + return emulated_float64_t(bit_cast(float64_t(val))); + }*/ + + // TODO: remove + emulated_float64_t addOld(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (FlushDenormToZero) + { + emulated_float64_t retval = emulated_float64_t::create(0ull); + + uint64_t mantissa; + uint32_t3 mantissaExtended; + int biasedExp; + + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + int expDiff = lhsBiasedExp - rhsBiasedExp; + + if (lhsSign == rhsSign) + { + if (expDiff == 0) + { + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = (lhsMantissa | rhsMantissa) != 0u; + return bit_cast >(glsl::mix(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + mantissa = lhsMantissa + rhsMantissa; + if (lhsBiasedExp == 0) + return bit_cast >(impl::assembleFloat64(lhsSign, 0, mantissa)); + mantissaExtended.xy = impl::packUint64(mantissa); + mantissaExtended.x |= 0x00200000u; + mantissaExtended.z = 0u; + biasedExp = lhsBiasedExp; + + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); + } + else + { + if (expDiff < 0) + { + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); + } + + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + const bool propagate = (lhsMantissa) != 0u; + return bit_cast >(glsl::mix(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = glsl::mix(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); + const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); + rhsMantissa = impl::unpackUint64(shifted.xy); + mantissaExtended.z = shifted.z; + biasedExp = lhsBiasedExp; + + lhsMantissa |= (1ull << 52); + mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); + --biasedExp; + if (!(mantissaExtended.x < 0x00200000u)) + { + mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); + ++biasedExp; + } + + return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); + } + + // cannot happen but compiler cries about not every path returning value + return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); + } + else + { + lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); + rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); + + if (expDiff != 0) + { + uint32_t2 frac; + + if (expDiff < 0) + { + swap(lhsMantissa, rhsMantissa); + swap(lhsBiasedExp, rhsBiasedExp); + lhsSign ^= ieee754::traits::signMask; + } + + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = lhsMantissa != 0u; + return bit_cast >(glsl::mix(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + + expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); + rhsMantissa = glsl::mix(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); + rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); + lhsMantissa |= 0x4000000000000000ull; + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + biasedExp = lhsBiasedExp; + --biasedExp; + return bit_cast >(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); + } + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; + return bit_cast >(glsl::mix(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); + } + rhsBiasedExp = glsl::mix(rhsBiasedExp, 1, lhsBiasedExp == 0); + lhsBiasedExp = glsl::mix(lhsBiasedExp, 1, lhsBiasedExp == 0); + + + const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); + const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); + + uint32_t2 frac; + uint64_t signOfDifference = 0; + if (rhsMantissaPacked.x < lhsMantissaPacked.x) + { + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + } + else if (lhsMantissaPacked.x < rhsMantissaPacked.x) + { + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; + } + else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) + { + /* It is possible that frac.x and frac.y may be zero after this. */ + frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); + } + else + { + frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); + signOfDifference = ieee754::traits::signMask; + } + + biasedExp = glsl::mix(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); + lhsSign ^= signOfDifference; + uint64_t retval_0 = impl::packFloat64(0, 0, 0u, 0u); + uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); + return bit_cast >(glsl::mix(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); + } + } + else + { + //static_assert(false, "not implemented yet"); + return bit_cast >(0xdeadbeefbadcaffeull); + } + } + + // arithmetic operators + this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (FlushDenormToZero) + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return bit_cast(ieee754::traits::quietNaN); + + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + if (lhsBiasedExp == 0ull) + return bit_cast(rhs.data); + if (rhsBiasedExp == 0ull) + return bit_cast(data); + + const uint64_t lhsSign = ieee754::extractSign(data); + const uint64_t rhsSign = ieee754::extractSign(rhs.data); + + if (lhsSign != rhsSign) + return addOld(rhs); + + // assuming lhsSign == rhsSign + const uint64_t resultSign = lhsSign == 0 ? 0 : ieee754::traits::signMask; + + if (!FastMath && (tgmath::isinf(data) || tgmath::isinf(rhs.data))) + return bit_cast(ieee754::traits::inf | resultSign); + + uint64_t lhsNormMantissa = ieee754::extractNormalizeMantissa(data); + uint64_t rhsNormMantissa = ieee754::extractNormalizeMantissa(rhs.data); + + // TODO: branchless? + /*if (lhsSign != rhsSign) + { + if (lhsSign) + lhsNormMantissa *= -1; + if (rhsSign) + rhsNormMantissa *= -1; + }*/ + + int expDiff = lhsBiasedExp - rhsBiasedExp; + + int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; + uint32_t shiftAmount = abs(expDiff); + + // so lhsNormMantissa always holds mantissa of number with greater exponent + if (expDiff < 0) + swap(lhsNormMantissa, rhsNormMantissa); + + rhsNormMantissa >>= shiftAmount; + + uint64_t resultMantissa = lhsNormMantissa + rhsNormMantissa; + + //const uint64_t resultSign = ((lhsSign && rhsSign) || (resultMantissa & (lhsSign << 63))) << 63; + uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; + + resultMantissa = resultMantissa; + + if (resultMantissa & 1ull << 53) + { + ++resultBiasedExp; + resultMantissa >>= 1; + } + + // TODO: better implementation with no loop + while (resultMantissa < (1ull << 52)) + { + --resultBiasedExp; + resultMantissa <<= 1; + } + + resultMantissa &= ieee754::traits::mantissaMask; + uint64_t output = impl::assembleFloat64(resultSign, uint64_t(resultBiasedExp) << ieee754::traits::mantissaBitCnt, resultMantissa); + return bit_cast(output); + } + + // not implemented + if (!FlushDenormToZero) + return bit_cast(0xdeadbeefbadcaffeull); + } + + emulated_float64_t operator+(float rhs) + { + return bit_cast(data) + create(rhs); + } + + emulated_float64_t operator-(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + emulated_float64_t lhs = bit_cast(data); + emulated_float64_t rhsFlipped = rhs.flipSign(); + + return lhs + rhsFlipped; + } + + emulated_float64_t operator-(float rhs) NBL_CONST_MEMBER_FUNC + { + return bit_cast(data) - create(rhs); + } + + emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if(FlushDenormToZero) + { + emulated_float64_t retval = this_t::create(0ull); + + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + uint64_t lhsMantissa = ieee754::extractMantissa(data); + uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; + uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; + if (!FastMath) + { + if (lhsBiasedExp == ieee754::traits::specialValueExp) + { + if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) + return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); + if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) + return bit_cast(ieee754::traits::quietNaN); + + return bit_cast(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); + } + if (rhsBiasedExp == ieee754::traits::specialValueExp) + { + /* a cannot be NaN, but is b NaN? */ + if (rhsMantissa != 0u) +#ifdef RELAXED_NAN_PROPAGATION + return rhs.data; +#else + return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); +#endif + if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) + return bit_cast(ieee754::traits::quietNaN); + + return bit_cast(sign | ieee754::traits::exponentMask); + } + if (lhsBiasedExp == 0) + { + if (lhsMantissa == 0u) + return bit_cast(sign); + impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); + } + if (rhsBiasedExp == 0) + { + if (rhsMantissa == 0u) + return bit_cast(sign); + impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); + } + } + + const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); + const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); + const uint64_t hi_r = (rhsMantissa >> 21) | (1ull << 31); + const uint64_t lo_r = rhsMantissa & ((1ull << 21) - 1); + + //const uint64_t RoundToNearest = (1ull << 31) - 1; + uint64_t newPseudoMantissa = ((hi_l * hi_r) >> 10) + ((hi_l * lo_r + lo_l * hi_r/* + RoundToNearest*/) >> 31); + + if (newPseudoMantissa & (0x1ull << 53)) + { + newPseudoMantissa >>= 1; + ++exp; + } + newPseudoMantissa &= (ieee754::traits::mantissaMask); + + return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); + } + else + { + //static_assert(false, "not implemented yet"); + return bit_cast(0xdeadbeefbadcaffeull); + } + } + + emulated_float64_t operator*(float rhs) + { + return _static_cast(data) * create(rhs); + } + + /*this_t reciprocal(uint64_t x) + { + using ThisType = this_t; + ThisType output = ThisType::bit_cast((0xbfcdd6a18f6a6f52ULL - x) >> 1); + output = output * output; + return output; + }*/ + + emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (FlushDenormToZero) + { + //return this_t::bit_cast(data) * reciprocal(rhs.data); + + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return bit_cast(ieee754::traits::quietNaN); + + const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; + + if (!FastMath && impl::isZero(rhs.data)) + return bit_cast(ieee754::traits::inf | sign); + + if (!FastMath && impl::areBothInfinity(data, rhs.data)) + return bit_cast(ieee754::traits::quietNaN); + + if (!FastMath && tgmath::isinf(data)) + return bit_cast(ieee754::traits::inf | sign); + + if (!FastMath && tgmath::isinf(rhs.data)) + return bit_cast(0ull | sign); + + + const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); + const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); + + int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; + + uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); + uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); + + while (mantissa < (1ull << 52)) + { + mantissa <<= 1; + exp--; + } + + mantissa &= ieee754::traits::mantissaMask; + + return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); + } + else + { + //static_assert(false, "not implemented yet"); + return bit_cast(0xdeadbeefbadcaffeull); + } + } + + // relational operators + // TODO: should `FlushDenormToZero` affect relational operators? + bool operator==(this_t rhs) NBL_CONST_MEMBER_FUNC + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + // TODO: i'm not sure about this one + if (!FastMath && impl::areBothZero(data, rhs.data)) + return true; + + const emulated_float64_t xored = bit_cast(data ^ rhs.data); + // TODO: check what fast math returns for -0 == 0 + if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) + return true; + + return !(xored.data); + } + bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + + return !(bit_cast(data) == rhs); + } + bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) + return false; + if (!FastMath && impl::areBothZero(data, rhs.data)) + return false; + + const uint64_t lhsSign = ieee754::extractSign(data); + const uint64_t rhsSign = ieee754::extractSign(rhs.data); + + // flip bits of negative numbers and flip signs of all numbers + uint64_t lhsFlipped = data ^ ((0x7FFFFFFFFFFFFFFFull * lhsSign) | ieee754::traits::signMask); + uint64_t rhsFlipped = rhs.data ^ ((0x7FFFFFFFFFFFFFFFull * rhsSign) | ieee754::traits::signMask); + + return lhsFlipped < rhsFlipped; + } + bool operator>(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) + return false; + if (!FastMath && impl::areBothZero(data, rhs.data)) + return false; + + const uint64_t lhsSign = ieee754::extractSign(data); + const uint64_t rhsSign = ieee754::extractSign(rhs.data); + + // flip bits of negative numbers and flip signs of all numbers + uint64_t lhsFlipped = data ^ ((0x7FFFFFFFFFFFFFFFull * lhsSign) | ieee754::traits::signMask); + uint64_t rhsFlipped = rhs.data ^ ((0x7FFFFFFFFFFFFFFFull * rhsSign) | ieee754::traits::signMask); + + return lhsFlipped > rhsFlipped; + } + bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + + return !(bit_cast(data) > bit_cast(rhs.data)); + } + bool operator>=(emulated_float64_t rhs) + { + if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + return false; + + return !(bit_cast(data) < bit_cast(rhs.data)); + } + + //logical operators + bool operator&&(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) && bool(rhs.data); } + bool operator||(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) || bool(rhs.data); } + bool operator!() NBL_CONST_MEMBER_FUNC { return !bool(data); } + + emulated_float64_t flipSign() + { + return bit_cast(data ^ ieee754::traits::signMask); + } + + NBL_CONSTEXPR_STATIC_INLINE bool supportsFastMath() + { + return FastMath; + } + + enum E_ROUNDING_MODE + { + FLOAT_ROUND_NEAREST_EVEN, + FLOAT_ROUND_TO_ZERO, + FLOAT_ROUND_DOWN, + FLOAT_ROUND_UP + }; + + static const E_ROUNDING_MODE RoundingMode = E_ROUNDING_MODE::FLOAT_ROUND_TO_ZERO; + }; + +#define IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(...) \ +template<>\ +struct traits_base<__VA_ARGS__ >\ +{\ + NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = 11;\ + NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = 52;\ +};\ +template<>\ +inline uint32_t extractBiasedExponent(__VA_ARGS__ x)\ +{\ + return extractBiasedExponent(x.data);\ +}\ +\ +template<>\ +inline int extractExponent(__VA_ARGS__ x)\ +{\ + return extractExponent(x.data);\ +}\ +\ +template<>\ +NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +{\ + return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ +}\ +\ +template <>\ +NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +{\ + return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ +}\ +\ +template <>\ +NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +{\ + return extractMantissa(x.data);\ +}\ +\ +template <>\ +NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +{\ + return extractNormalizeMantissa(x.data);\ +}\ +\ + +#define DEFINE_BIT_CAST_SPEC(...)\ +template<>\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ bit_cast<__VA_ARGS__, uint64_t>(NBL_CONST_REF_ARG(uint64_t) val)\ +{\ +__VA_ARGS__ output;\ +output.data = val;\ +\ +return output;\ +}\ +\ +template<>\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ bit_cast<__VA_ARGS__, float64_t>(NBL_CONST_REF_ARG(float64_t) val)\ +{\ +__VA_ARGS__ output;\ +output.data = bit_cast(val);\ +\ +return output;\ +}\ +\ + +namespace impl +{ + +template +struct static_cast_helper,void> +{ + static_assert(is_scalar::value); + + using From = emulated_float64_t; + + // TODO: test + static inline To cast(From v) + { + using ToAsFloat = typename float_of_size::type; + using ToAsUint = typename unsigned_integer_of_size::type; + + + if (is_same_v) + return To(bit_cast(v.data)); + + if (is_floating_point::value) + { + + const int exponent = ieee754::extractExponent(v.data); + if (!From::supportsFastMath()) + { + //TODO: i have no idea why it doesn't work, fix + /*if (exponent > ieee754::traits::exponentMax) + return bit_cast(ieee754::traits::inf); + if (exponent < ieee754::traits::exponentMin) + return -bit_cast(ieee754::traits::inf); + if (tgmath::isnan(v.data)) + return bit_cast(ieee754::traits::quietNaN);*/ + } + + + const uint32_t toBitSize = sizeof(To) * 8; + const ToAsUint sign = ToAsUint(ieee754::extractSign(v.data) << (toBitSize - 1)); + const ToAsUint biasedExponent = ToAsUint(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; + const ToAsUint mantissa = ToAsUint(v.data >> (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt)) & ieee754::traits::mantissaMask; + + return bit_cast(sign | biasedExponent | mantissa); + } + + // NOTE: casting from negative float to unsigned int is an UB, function will return abs value in this case + if (is_integral::value) + { + const int exponent = ieee754::extractExponent(v.data); + if (exponent < 0) + return 0; + + uint64_t unsignedOutput = ieee754::extractMantissa(v.data) & 1ull << ieee754::traits::mantissaBitCnt; + const int shiftAmount = exponent - int(ieee754::traits::mantissaBitCnt); + + if (shiftAmount < 0) + unsignedOutput <<= -shiftAmount; + else + unsignedOutput >>= shiftAmount; + + if (is_signed::value) + { + int64_t signedOutput64 = unsignedOutput & ((1ull << 63) - 1); + To signedOutput = To(signedOutput64); + if (ieee754::extractSignPreserveBitPattern(v.data) != 0) + signedOutput = -signedOutput; + + return signedOutput; + } + + return To(unsignedOutput); + } + + // assert(false); + return To(0xdeadbeefbadcaffeull); + } +}; + +template +struct static_cast_helper, float32_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(float32_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, float64_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(float64_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, uint32_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(uint32_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, uint64_t, void> +{ + using To = emulated_float64_t; + + static inline To cast(uint64_t v) + { + return To::create(v); + } +}; + +template +struct static_cast_helper, emulated_float64_t, void> +{ + static inline emulated_float64_t cast(emulated_float64_t v) + { + return v; + } +}; + +} + +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); +DEFINE_BIT_CAST_SPEC(emulated_float64_t); + +//template +//struct is_floating_point > : bool_constant {}; + +namespace ieee754 +{ +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); +} + +} +} + +#undef IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE +#undef DEFINE_BIT_CAST_SPEC + +#endif diff --git a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl similarity index 82% rename from include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl rename to include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl index 3045df49d4..d2b59e9607 100644 --- a/include/nbl/builtin/hlsl/impl/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl @@ -1,509 +1,449 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ - -#include -#include -#include -#include -#include - -#define FLOAT_ROUND_NEAREST_EVEN 0 -#define FLOAT_ROUND_TO_ZERO 1 -#define FLOAT_ROUND_DOWN 2 -#define FLOAT_ROUND_UP 3 -#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN - -// TODO: when it will be possible, use this unions wherever they fit: -/* -* union Mantissa -* { -* struct -* { -* uint32_t highBits; -* uint64_t lowBits; -* }; -* -* uint32_t2 packed; -* }; -* -*/ - -/* -* union Mantissa -* { -* struct -* { -* uint64_t lhs; -* uint64_t rhs; -* }; -* -* uint32_t4 packed; -* }; -* -*/ - -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) -{ - uint64_t2 output; - output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); - output.y = mantissa64 << (ieee754::traits::mantissaBitCnt); - - return output; -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) -{ - uint32_t2 z; - - z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; - z.y = zFrac1; - - uint64_t output = 0u; - output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; - output |= uint64_t(z.y); - return output; -} - -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 packUint64(uint64_t val) -{ - return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t unpackUint64(uint32_t2 val) -{ - return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); -} - -template -inline uint64_t castFloat32ToStorageType(float32_t val) -{ - if (FlushDenormToZero) - { - const uint64_t sign = uint64_t(ieee754::extractSign(val)) << 63; - if (tgmath::isInf(val)) - return ieee754::traits::inf | sign; - uint32_t asUint = ieee754::impl::castToUintType(val); - const int f32BiasedExp = ieee754::extractBiasedExponent(val); - if (f32BiasedExp == 0) - return sign; - const uint64_t biasedExp = uint64_t(f32BiasedExp - ieee754::traits::exponentBias + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); - const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt); - - return sign | biasedExp | mantissa; - } - else - { - // static_assert(false); - return 0xdeadbeefbadcaffeull; - } -}; - -inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) -{ - if (val == 0) - return val; - -#ifndef __HLSL_VERSION - int exp = findMSB(val); -#else - int exp = 63; - uint64_t mask = ieee754::traits::signMask; - while (!(val & mask)) - { - --exp; - mask >>= 1; - } - - - //uint32_t2 valPacked = packUint64(val); - //int exp = valPacked.x ? firstbithigh(valPacked.x) + 32 : firstbithigh(valPacked.y); - //exp = 63 - exp; -#endif - uint64_t mantissa; - - int shiftCnt = 52 - exp; - if (shiftCnt >= 0) - { - mantissa = val << shiftCnt; - } - else - { - const int shiftCntAbs = -shiftCnt; - uint64_t roundingBit = 1ull << (shiftCnt - 1); - uint64_t stickyBitMask = roundingBit - 1; - uint64_t stickyBit = val & stickyBitMask; - - mantissa = val >> shiftCntAbs; - - if ((val & roundingBit) && (!stickyBit)) - { - bool isEven = mantissa & 1; - if (!isEven) - mantissa++; - } - else if ((val & roundingBit) && (stickyBit || (mantissa & 1))) - val += roundingBit; - - //val += (1ull << (shiftCnt)) - 1; - //mantissa = val >> shiftCntAbs; - - if (mantissa & 1ull << 53) - { - mantissa >>= 1; - exp++; - } - } - mantissa &= ieee754::traits::mantissaMask; - const uint64_t biasedExp = uint64_t(ieee754::traits::exponentBias + exp) << ieee754::traits::mantissaBitCnt; - - return biasedExp | mantissa; -}; - -inline uint64_t castToUint64WithFloat64BitPattern(int64_t val) -{ - const uint64_t sign = val & ieee754::traits::signMask; - const uint64_t absVal = uint64_t(abs(val)); - return sign | castToUint64WithFloat64BitPattern(absVal); -}; - -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) -{ - uint64_t product = uint64_t(lhs) * uint64_t(rhs); - uint32_t2 output; - output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); - output.y = uint32_t(product & 0x00000000FFFFFFFFull); - return output; -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) -{ -#if defined RELAXED_NAN_PROPAGATION - return lhs | rhs; -#else - - lhs |= 0x0008000000000000ull; - rhs |= 0x0008000000000000ull; - return glsl::mix(rhs, glsl::mix(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); - return 0; -#endif -} - -static inline int countLeadingZeros32(uint32_t val) -{ -#ifndef __HLSL_VERSION - return 31 - findMSB(val); -#else - return 31 - firstbithigh(val); -#endif -} - -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count) -{ - uint32_t2 output; - const int negCount = (-count) & 31; - - output.x = glsl::mix(0u, val.x, count == 0); - output.x = glsl::mix(output.x, (val.x >> count), count < 32); - - output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ - uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); - output.y = glsl::mix(output.y, (val.x << negCount) | (val.y >> count), count < 32); - - val.z = glsl::mix(val.z | val.y, val.z, count < 32); - output.x = glsl::mix(output.x, val.x >> count, count < 32); - output.z |= uint32_t(val.z != 0u); - - output.x = glsl::mix(output.x, 0u, (count == 32)); - output.y = glsl::mix(output.y, val.x, (count == 32)); - output.z = glsl::mix(output.z, val.y, (count == 32)); - output.x = glsl::mix(output.x, val.x, (count == 0)); - output.y = glsl::mix(output.y, val.y, (count == 0)); - output.z = glsl::mix(output.z, val.z, (count == 0)); - - return output; -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t shortShift64Left(uint64_t val, int count) -{ - const uint32_t2 packed = packUint64(val); - - uint32_t2 output; - output.y = packed.y << count; - // TODO: fix - output.x = glsl::mix((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); - - return unpackUint64(output); -}; - -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) -{ - return signShifted + expShifted + mantissa; -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) -{ - bool roundNearestEven; - bool increment; - - roundNearestEven = true; - increment = int(mantissaExtended.z) < 0; - if (!roundNearestEven) - { - if (false) //(FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) - { - increment = false; - } - else - { - if (false) //(zSign != 0u) - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && - // (zFrac2 != 0u); - } - else - { - //increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && - // (zFrac2 != 0u); - } - } - } - - // overflow handling? - // if biased exp is lesser then 2045 - if (0x7FD <= zExp) - { - if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) - { - if (false) // ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || - // ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || - // ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) - { - return assembleFloat64(zSign, 0x7FE << ieee754::traits::mantissaBitCnt, 0x000FFFFFFFFFFFFFull); - } - - return assembleFloat64(zSign, ieee754::traits::exponentMask, 0ull); - } - } - - if (zExp < 0) - { - mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); - zExp = 0; - - if (roundNearestEven) - { - increment = mantissaExtended.z < 0u; - } - else - { - if (zSign != 0u) - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (mantissaExtended.z != 0u); - } - else - { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && (mantissaExtended.z != 0u); - } - } - } - - if (increment) - { - const uint64_t added = impl::unpackUint64(uint32_t2(mantissaExtended.xy)) + 1ull; - mantissaExtended.xy = packUint64(added); - mantissaExtended.y &= ~((mantissaExtended.z + uint32_t(mantissaExtended.z == 0u)) & uint32_t(roundNearestEven)); - } - else - { - // ?? - zExp = glsl::mix(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); - } - - return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); -} - -static inline uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) -{ - int shiftCount; - uint32_t3 frac = uint32_t3(frac0, frac1, 0u); - - if (frac.x == 0u) - { - exp -= 32; - frac.x = frac.y; - frac.y = 0u; - } - - shiftCount = countLeadingZeros32(frac.x) - 11; - if (0 <= shiftCount) - { - // TODO: this is packing and unpacking madness, fix it - frac.xy = packUint64(shortShift64Left(unpackUint64(frac.xy), shiftCount)); - } - else - { - frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); - } - exp -= shiftCount; - return roundAndPackFloat64(sign, exp, frac); -} - -static inline void normalizeFloat64Subnormal(uint64_t mantissa, - NBL_REF_ARG(int) outExp, - NBL_REF_ARG(uint64_t) outMantissa) -{ - uint32_t2 mantissaPacked = packUint64(mantissa); - int shiftCount; - uint32_t2 temp; - shiftCount = countLeadingZeros32(glsl::mix(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; - outExp = glsl::mix(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - - temp.x = glsl::mix(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); - temp.y = glsl::mix(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); - - shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); - - outMantissa = glsl::mix(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); -} - -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) -{ - lhs &= ~ieee754::traits::signMask; - rhs &= ~ieee754::traits::signMask; - - return lhs == rhs && lhs == ieee754::traits::inf; -} - -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) -{ - return lhs == rhs && (lhs & ~ieee754::traits::signMask) == ieee754::traits::inf; -} - -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) -{ - return (val << 1) == 0; -} - -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) -{ - return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); -} - -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) -{ - return ((lhs << 1) == 0ull) && (lhs == rhs); -} - -// TODO: find more efficient algorithm -static inline uint64_t nlz64(uint64_t x) -{ - static const uint64_t MASK = 1ull << 63; - - uint64_t counter = 0; - - while ((x & MASK) == 0) - { - x <<= 1; - ++counter; - } - return counter; -} - -// returns pair of quotient and remainder -static inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) -{ - const uint64_t b = 1ull << 32; - uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; - uint64_t s; - - //TODO: countl_zero - s = countl_zero(divisor); - //s = nlz64(divisor); - divisor <<= s; - vn1 = divisor >> 32; - vn0 = divisor & 0xFFFFFFFF; - - if (s > 0) - { - un32 = (dividentHigh << s) | (dividentLow >> (64 - s)); - un10 = dividentLow << s; - } - else - { - un32 = dividentHigh; - un10 = dividentLow; - } - - un1 = un10 >> 32; - un0 = un10 & 0xFFFFFFFF; - - q1 = un32 / vn1; - rhat = un32 % vn1; - - left = q1 * vn0; - right = (rhat << 32) + un1; - while ((q1 >= b) || (left > right)) - { - --q1; - rhat += vn1; - if (rhat < b) - { - left -= vn0; - right = (rhat << 32) | un1; - } - break; - } - - un21 = (un32 << 32) + (un1 - (q1 * divisor)); - - q0 = un21 / vn1; - rhat = un21 % vn1; - - left = q0 * vn0; - right = (rhat << 32) | un0; - while ((q0 >= b) || (left > right)) - { - --q0; - rhat += vn1; - if (rhat < b) - { - left -= vn0; - right = (rhat << 32) | un0; - continue; - } - break; - } - - return (q1 << 32) | q0; -} -} -} -} +#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ + +#include +#include +#include +#include +#include + +// TODO: when it will be possible, use this unions wherever they fit: +/* +* union Mantissa +* { +* struct +* { +* uint32_t highBits; +* uint64_t lowBits; +* }; +* +* uint32_t2 packed; +* }; +* +*/ +/* +* union Mantissa +* { +* struct +* { +* uint64_t lhs; +* uint64_t rhs; +* }; +* +* uint32_t4 packed; +* }; +* +*/ + +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +{ + uint64_t2 output; + output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); + output.y = mantissa64 << (ieee754::traits::mantissaBitCnt); + + return output; +} + +NBL_CONSTEXPR_INLINE_FUNC uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) +{ + uint32_t2 z; + + z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; + z.y = zFrac1; + + uint64_t output = 0u; + output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; + output |= uint64_t(z.y); + return output; +} + +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 packUint64(uint64_t val) +{ + return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); +} + +NBL_CONSTEXPR_INLINE_FUNC uint64_t unpackUint64(uint32_t2 val) +{ + return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); +} + +template +inline uint64_t castFloat32ToStorageType(float32_t val) +{ + if (FlushDenormToZero) + { + const uint64_t sign = uint64_t(ieee754::extractSign(val)) << 63; + if (tgmath::isinf(val)) + return ieee754::traits::inf | sign; + uint32_t asUint = ieee754::impl::bitCastToUintType(val); + const int f32BiasedExp = ieee754::extractBiasedExponent(val); + if (f32BiasedExp == 0) + return sign; + const uint64_t biasedExp = uint64_t(f32BiasedExp - ieee754::traits::exponentBias + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt); + + return sign | biasedExp | mantissa; + } + else + { + // static_assert(false); + return 0xdeadbeefbadcaffeull; + } +}; + +inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) +{ + if (val == 0) + return val; + +#ifndef __HLSL_VERSION + int exp = findMSB(val); +#else + int exp = 63; + uint64_t mask = ieee754::traits::signMask; + while (!(val & mask)) + { + --exp; + mask >>= 1; + } + + + //uint32_t2 valPacked = packUint64(val); + //int exp = valPacked.x ? firstbithigh(valPacked.x) + 32 : firstbithigh(valPacked.y); + //exp = 63 - exp; +#endif + uint64_t mantissa; + + int shiftCnt = 52 - exp; + if (shiftCnt >= 0) + { + mantissa = val << shiftCnt; + } + else + { + const int shiftCntAbs = -shiftCnt; + uint64_t roundingBit = 1ull << (shiftCnt - 1); + uint64_t stickyBitMask = roundingBit - 1; + uint64_t stickyBit = val & stickyBitMask; + + mantissa = val >> shiftCntAbs; + + if ((val & roundingBit) && (!stickyBit)) + { + bool isEven = mantissa & 1; + if (!isEven) + mantissa++; + } + else if ((val & roundingBit) && (stickyBit || (mantissa & 1))) + val += roundingBit; + + //val += (1ull << (shiftCnt)) - 1; + //mantissa = val >> shiftCntAbs; + + if (mantissa & 1ull << 53) + { + mantissa >>= 1; + exp++; + } + } + mantissa &= ieee754::traits::mantissaMask; + const uint64_t biasedExp = uint64_t(ieee754::traits::exponentBias + exp) << ieee754::traits::mantissaBitCnt; + + return biasedExp | mantissa; +}; + +inline uint64_t castToUint64WithFloat64BitPattern(int64_t val) +{ + const uint64_t sign = val & ieee754::traits::signMask; + const uint64_t absVal = uint64_t(abs(val)); + return sign | castToUint64WithFloat64BitPattern(absVal); +}; + +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) +{ + uint64_t product = uint64_t(lhs) * uint64_t(rhs); + uint32_t2 output; + output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); + output.y = uint32_t(product & 0x00000000FFFFFFFFull); + return output; +} + +NBL_CONSTEXPR_INLINE_FUNC uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) +{ +#if defined RELAXED_NAN_PROPAGATION + return lhs | rhs; +#else + + lhs |= 0x0008000000000000ull; + rhs |= 0x0008000000000000ull; + return glsl::mix(rhs, glsl::mix(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); + return 0; +#endif +} + +static inline int countLeadingZeros32(uint32_t val) +{ +#ifndef __HLSL_VERSION + return 31 - findMSB(val); +#else + return 31 - firstbithigh(val); +#endif +} + +NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count) +{ + uint32_t2 output; + const int negCount = (-count) & 31; + + output.x = glsl::mix(0u, val.x, count == 0); + output.x = glsl::mix(output.x, (val.x >> count), count < 32); + + output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ + uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y<> (count & 31)), count < 64); + output.y = glsl::mix(output.y, (val.x << negCount) | (val.y >> count), count < 32); + + val.z = glsl::mix(val.z | val.y, val.z, count < 32); + output.x = glsl::mix(output.x, val.x >> count, count < 32); + output.z |= uint32_t(val.z != 0u); + + output.x = glsl::mix(output.x, 0u, (count == 32)); + output.y = glsl::mix(output.y, val.x, (count == 32)); + output.z = glsl::mix(output.z, val.y, (count == 32)); + output.x = glsl::mix(output.x, val.x, (count == 0)); + output.y = glsl::mix(output.y, val.y, (count == 0)); + output.z = glsl::mix(output.z, val.z, (count == 0)); + + return output; +} + +NBL_CONSTEXPR_INLINE_FUNC uint64_t shortShift64Left(uint64_t val, int count) +{ + const uint32_t2 packed = packUint64(val); + + uint32_t2 output; + output.y = packed.y << count; + // TODO: fix + output.x = glsl::mix((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); + + return unpackUint64(output); +}; + +NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +{ + return signShifted + expShifted + mantissa; +} + +NBL_CONSTEXPR_INLINE_FUNC uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) +{ + bool roundNearestEven; + bool increment; + + roundNearestEven = true; + increment = int(mantissaExtended.z) < 0; + + // overflow handling? + // if biased exp is lesser then 2045 + if (0x7FD <= zExp) + { + if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) + return assembleFloat64(zSign, 0x7FE << ieee754::traits::mantissaBitCnt, 0x000FFFFFFFFFFFFFull); + + return assembleFloat64(zSign, ieee754::traits::exponentMask, 0ull); + } + + if (zExp < 0) + { + mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); + zExp = 0; + } + + zExp = glsl::mix(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); + + return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); +} + +static inline uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) +{ + int shiftCount; + uint32_t3 frac = uint32_t3(frac0, frac1, 0u); + + if (frac.x == 0u) + { + exp -= 32; + frac.x = frac.y; + frac.y = 0u; + } + + shiftCount = countLeadingZeros32(frac.x) - 11; + if (0 <= shiftCount) + { + // TODO: this is packing and unpacking madness, fix it + frac.xy = packUint64(shortShift64Left(unpackUint64(frac.xy), shiftCount)); + } + else + { + frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); + } + exp -= shiftCount; + return roundAndPackFloat64(sign, exp, frac); +} + +static inline void normalizeFloat64Subnormal(uint64_t mantissa, + NBL_REF_ARG(int) outExp, + NBL_REF_ARG(uint64_t) outMantissa) +{ + uint32_t2 mantissaPacked = packUint64(mantissa); + int shiftCount; + uint32_t2 temp; + shiftCount = countLeadingZeros32(glsl::mix(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; + outExp = glsl::mix(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); + + temp.x = glsl::mix(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); + temp.y = glsl::mix(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); + + shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); + + outMantissa = glsl::mix(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); +} + +NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +{ + lhs &= ~ieee754::traits::signMask; + rhs &= ~ieee754::traits::signMask; + + return lhs == rhs && lhs == ieee754::traits::inf; +} + +NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) +{ + return lhs == rhs && (lhs & ~ieee754::traits::signMask) == ieee754::traits::inf; +} + +NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +{ + return (val << 1) == 0; +} + +NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +{ + return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); +} + +NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +{ + return ((lhs << 1) == 0ull) && (lhs == rhs); +} + +// TODO: find more efficient algorithm +static inline uint64_t nlz64(uint64_t x) +{ + static const uint64_t MASK = 1ull << 63; + + uint64_t counter = 0; + + while ((x & MASK) == 0) + { + x <<= 1; + ++counter; + } + return counter; +} + +// returns pair of quotient and remainder +static inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) +{ + const uint64_t b = 1ull << 32; + uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; + uint64_t s; + + //TODO: countl_zero + s = countl_zero(divisor); + //s = nlz64(divisor); + divisor <<= s; + vn1 = divisor >> 32; + vn0 = divisor & 0xFFFFFFFF; + + if (s > 0) + { + un32 = (dividentHigh << s) | (dividentLow >> (64 - s)); + un10 = dividentLow << s; + } + else + { + un32 = dividentHigh; + un10 = dividentLow; + } + + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; + + q1 = un32 / vn1; + rhat = un32 % vn1; + + left = q1 * vn0; + right = (rhat << 32) + un1; + while ((q1 >= b) || (left > right)) + { + --q1; + rhat += vn1; + if (rhat < b) + { + left -= vn0; + right = (rhat << 32) | un1; + } + break; + } + + un21 = (un32 << 32) + (un1 - (q1 * divisor)); + + q0 = un21 / vn1; + rhat = un21 % vn1; + + left = q0 * vn0; + right = (rhat << 32) | un0; + while ((q0 >= b) || (left > right)) + { + --q0; + rhat += vn1; + if (rhat < b) + { + left -= vn0; + right = (rhat << 32) | un0; + continue; + } + break; + } + + return (q1 << 32) | q0; +} +} +} +} #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl similarity index 98% rename from include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl rename to include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl index bf58f4e005..ac7b79e74b 100644 --- a/include/nbl/builtin/hlsl/emulated_float64_t_utils.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl @@ -3,7 +3,7 @@ #include #include -#include +#include namespace nbl { @@ -18,7 +18,7 @@ template >::type; #else template -using portable_float64_t = typename conditional >::type; +using portable_float64_t = typename conditional >::type; #endif template diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl similarity index 87% rename from include/nbl/builtin/hlsl/ieee754.hlsl rename to include/nbl/builtin/hlsl/ieee754/ieee754.hlsl index 7e36501f0a..f869f4ceba 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl @@ -27,15 +27,15 @@ namespace impl } template - NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type castToUintType(T x) + NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type bitCastToUintType(T x) { using AsUint = typename unsigned_integer_of_size::type; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t - template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<2>::type castToUintType(uint16_t x) { return x; } - template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<4>::type castToUintType(uint32_t x) { return x; } - template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<8>::type castToUintType(uint64_t x) { return x; } + template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<2>::type bitCastToUintType(uint16_t x) { return x; } + template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<4>::type bitCastToUintType(uint32_t x) { return x; } + template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<8>::type bitCastToUintType(uint64_t x) { return x; } template NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) @@ -102,7 +102,7 @@ template inline uint32_t extractBiasedExponent(T x) { using AsUint = typename unsigned_integer_of_size::type; - return glsl::bitfieldExtract(impl::castToUintType(x), traits::type>::mantissaBitCnt, traits::type>::exponentBitCnt); + return glsl::bitfieldExtract(impl::bitCastToUintType(x), traits::type>::mantissaBitCnt, traits::type>::exponentBitCnt); } template<> @@ -115,7 +115,7 @@ inline uint32_t extractBiasedExponent(uint64_t x) template<> inline uint32_t extractBiasedExponent(float64_t x) { - return extractBiasedExponent(impl::castToUintType(x)); + return extractBiasedExponent(impl::bitCastToUintType(x)); } template @@ -131,7 +131,7 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // TODO: //staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); using AsFloat = typename float_of_size::type; - return impl::castBackToFloatType(glsl::bitfieldInsert(impl::castToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); + return impl::castBackToFloatType(glsl::bitfieldInsert(impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); } // performs no overflow tests, returns x*exp2(n) @@ -145,7 +145,7 @@ template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; - return impl::castToUintType(x) & traits::type>::mantissaMask; + return impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template @@ -160,14 +160,14 @@ template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; - return (impl::castToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); + return (impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; - return impl::castToUintType(x) & traits::signMask; + return impl::bitCastToUintType(x) & traits::signMask; } } diff --git a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl index ba0f70ba67..4d40e6f327 100644 --- a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl +++ b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl @@ -5,7 +5,7 @@ #ifndef _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ #define _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ -#include +#include // TODO: Later include from correct hlsl header #ifndef nbl_hlsl_FLT_EPSILON diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index d3178a29f7..7fdab46942 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -10,8 +10,8 @@ #include #include #include -#include -#include +#include +#include // TODO: Later include from correct hlsl header (numeric_limits.hlsl) #ifndef nbl_hlsl_FLT_EPSILON diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 75cddf27c0..9fac5dce96 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -5,7 +5,7 @@ #define _NBL_BUILTIN_HLSL_TGMATH_INCLUDED_ #include -#include +#include #include namespace nbl @@ -27,7 +27,7 @@ inline bool isnan(T val) } template -NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) +NBL_CONSTEXPR_INLINE_FUNC bool isinf(T val) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index cad9995c2c..e06d9e2077 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -231,14 +231,14 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_nor # HLSL LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") -#impl -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/impl/emulated_float64_t_impl.hlsl") #emulated -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated_float64_t_utils.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t_utils.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t_impl.hlsl") +#ieee754 +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/ieee754.hlsl") #utility LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/fragment_shader_pixel_interlock.hlsl") diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index f13429efc1..541a600b03 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -1615,8 +1615,7 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic vk_deviceFeatures2.features.shaderStorageImageArrayDynamicIndexing = limits.shaderStorageImageArrayDynamicIndexing; vk_deviceFeatures2.features.shaderClipDistance = true; // good device support vk_deviceFeatures2.features.shaderCullDistance = enabledFeatures.shaderCullDistance; - //vk_deviceFeatures2.features.shaderFloat64 = limits.shaderFloat64; // TODO: enable back - vk_deviceFeatures2.features.shaderFloat64 = VK_FALSE; + vk_deviceFeatures2.features.shaderFloat64 = limits.shaderFloat64; vk_deviceFeatures2.features.shaderInt64 = true; // always enable vk_deviceFeatures2.features.shaderInt16 = true; // always enable vk_deviceFeatures2.features.shaderResourceResidency = enabledFeatures.shaderResourceResidency; From 1c029c10dc57aff734fccda728add61b2414c057 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 4 Sep 2024 00:13:39 +0100 Subject: [PATCH 046/432] Reduced branches in operator+ --- examples_tests | 2 +- .../hlsl/emulated/emulated_float64_t.hlsl | 212 +++--------------- include/nbl/builtin/hlsl/ieee754/ieee754.hlsl | 2 +- 3 files changed, 35 insertions(+), 181 deletions(-) diff --git a/examples_tests b/examples_tests index f8f2c23aab..a9e1007e59 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f8f2c23aab0092015b3bf31cdab9875c1435f5dc +Subproject commit a9e1007e592786182c25ed1f6c25ad9e9306107c diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl index 088e535c5b..a6eb9311d3 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl @@ -72,161 +72,6 @@ namespace hlsl return emulated_float64_t(bit_cast(float64_t(val))); }*/ - // TODO: remove - emulated_float64_t addOld(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC - { - if (FlushDenormToZero) - { - emulated_float64_t retval = emulated_float64_t::create(0ull); - - uint64_t mantissa; - uint32_t3 mantissaExtended; - int biasedExp; - - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - int expDiff = lhsBiasedExp - rhsBiasedExp; - - if (lhsSign == rhsSign) - { - if (expDiff == 0) - { - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = (lhsMantissa | rhsMantissa) != 0u; - return bit_cast >(glsl::mix(data, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - mantissa = lhsMantissa + rhsMantissa; - if (lhsBiasedExp == 0) - return bit_cast >(impl::assembleFloat64(lhsSign, 0, mantissa)); - mantissaExtended.xy = impl::packUint64(mantissa); - mantissaExtended.x |= 0x00200000u; - mantissaExtended.z = 0u; - biasedExp = lhsBiasedExp; - - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); - } - else - { - if (expDiff < 0) - { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - const bool propagate = (lhsMantissa) != 0u; - return bit_cast >(glsl::mix(ieee754::traits::exponentMask | lhsSign, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = glsl::mix(rhsMantissa | (1ull << 52), rhsMantissa, rhsBiasedExp == 0); - const uint32_t3 shifted = impl::shift64ExtraRightJamming(uint32_t3(impl::packUint64(rhsMantissa), 0u), expDiff); - rhsMantissa = impl::unpackUint64(shifted.xy); - mantissaExtended.z = shifted.z; - biasedExp = lhsBiasedExp; - - lhsMantissa |= (1ull << 52); - mantissaExtended.xy = impl::packUint64(lhsMantissa + rhsMantissa); - --biasedExp; - if (!(mantissaExtended.x < 0x00200000u)) - { - mantissaExtended = impl::shift64ExtraRightJamming(mantissaExtended, 1); - ++biasedExp; - } - - return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended.xyz)); - } - - // cannot happen but compiler cries about not every path returning value - return bit_cast >(impl::roundAndPackFloat64(lhsSign, biasedExp, mantissaExtended)); - } - else - { - lhsMantissa = impl::shortShift64Left(lhsMantissa, 10); - rhsMantissa = impl::shortShift64Left(rhsMantissa, 10); - - if (expDiff != 0) - { - uint32_t2 frac; - - if (expDiff < 0) - { - swap(lhsMantissa, rhsMantissa); - swap(lhsBiasedExp, rhsBiasedExp); - lhsSign ^= ieee754::traits::signMask; - } - - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = lhsMantissa != 0u; - return bit_cast >(glsl::mix(impl::assembleFloat64(lhsSign, ieee754::traits::exponentMask, 0ull), impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - - expDiff = glsl::mix(abs(expDiff), abs(expDiff) - 1, rhsBiasedExp == 0); - rhsMantissa = glsl::mix(rhsMantissa | 0x4000000000000000ull, rhsMantissa, rhsBiasedExp == 0); - rhsMantissa = impl::unpackUint64(impl::shift64RightJamming(impl::packUint64(rhsMantissa), expDiff)); - lhsMantissa |= 0x4000000000000000ull; - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - biasedExp = lhsBiasedExp; - --biasedExp; - return bit_cast >(impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 10, frac.x, frac.y)); - } - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - bool propagate = ((lhsMantissa) | (rhsMantissa)) != 0u; - return bit_cast >(glsl::mix(ieee754::traits::quietNaN, impl::propagateFloat64NaN(data, rhs.data), propagate)); - } - rhsBiasedExp = glsl::mix(rhsBiasedExp, 1, lhsBiasedExp == 0); - lhsBiasedExp = glsl::mix(lhsBiasedExp, 1, lhsBiasedExp == 0); - - - const uint32_t2 lhsMantissaPacked = impl::packUint64(lhsMantissa); - const uint32_t2 rhsMantissaPacked = impl::packUint64(rhsMantissa); - - uint32_t2 frac; - uint64_t signOfDifference = 0; - if (rhsMantissaPacked.x < lhsMantissaPacked.x) - { - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - } - else if (lhsMantissaPacked.x < rhsMantissaPacked.x) - { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; - } - else if (rhsMantissaPacked.y <= lhsMantissaPacked.y) - { - /* It is possible that frac.x and frac.y may be zero after this. */ - frac.xy = impl::packUint64(lhsMantissa - rhsMantissa); - } - else - { - frac.xy = impl::packUint64(rhsMantissa - lhsMantissa); - signOfDifference = ieee754::traits::signMask; - } - - biasedExp = glsl::mix(rhsBiasedExp, lhsBiasedExp, signOfDifference == 0u); - lhsSign ^= signOfDifference; - uint64_t retval_0 = impl::packFloat64(0, 0, 0u, 0u); - uint64_t retval_1 = impl::normalizeRoundAndPackFloat64(lhsSign, biasedExp - 11, frac.x, frac.y); - return bit_cast >(glsl::mix(retval_0, retval_1, frac.x != 0u || frac.y != 0u)); - } - } - else - { - //static_assert(false, "not implemented yet"); - return bit_cast >(0xdeadbeefbadcaffeull); - } - } - // arithmetic operators this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { @@ -235,8 +80,8 @@ namespace hlsl if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return bit_cast(ieee754::traits::quietNaN); - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + const int lhsBiasedExp = ieee754::extractBiasedExponent(data); + const int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); if (lhsBiasedExp == 0ull) return bit_cast(rhs.data); @@ -246,39 +91,48 @@ namespace hlsl const uint64_t lhsSign = ieee754::extractSign(data); const uint64_t rhsSign = ieee754::extractSign(rhs.data); - if (lhsSign != rhsSign) - return addOld(rhs); - - // assuming lhsSign == rhsSign - const uint64_t resultSign = lhsSign == 0 ? 0 : ieee754::traits::signMask; - - if (!FastMath && (tgmath::isinf(data) || tgmath::isinf(rhs.data))) - return bit_cast(ieee754::traits::inf | resultSign); + if (!FastMath && tgmath::isinf(data)) + return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(data, rhs.data))); uint64_t lhsNormMantissa = ieee754::extractNormalizeMantissa(data); uint64_t rhsNormMantissa = ieee754::extractNormalizeMantissa(rhs.data); - // TODO: branchless? - /*if (lhsSign != rhsSign) - { - if (lhsSign) - lhsNormMantissa *= -1; - if (rhsSign) - rhsNormMantissa *= -1; - }*/ - - int expDiff = lhsBiasedExp - rhsBiasedExp; + const int expDiff = lhsBiasedExp - rhsBiasedExp; - int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; - uint32_t shiftAmount = abs(expDiff); + const int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; + const uint32_t shiftAmount = abs(expDiff); - // so lhsNormMantissa always holds mantissa of number with greater exponent + uint64_t resultSign; if (expDiff < 0) + { + // so lhsNormMantissa always holds mantissa of number with greater exponent swap(lhsNormMantissa, rhsNormMantissa); + resultSign = rhsSign > 0 ? ieee754::traits::signMask : 0ull; + } + else + { + resultSign = lhsSign > 0 ? ieee754::traits::signMask : 0ull; + } rhsNormMantissa >>= shiftAmount; - uint64_t resultMantissa = lhsNormMantissa + rhsNormMantissa; + uint64_t resultMantissa; + if (lhsSign != rhsSign) + { + int64_t mantissaDiff = lhsNormMantissa - rhsNormMantissa; + if (mantissaDiff < 0) + swap(lhsNormMantissa, rhsNormMantissa); + + lhsNormMantissa <<= 10; + rhsNormMantissa <<= 10; + resultMantissa = uint64_t(int64_t(lhsNormMantissa) - int64_t(rhsNormMantissa)); + resultMantissa >>= 10; + } + else + { + resultMantissa = lhsNormMantissa + rhsNormMantissa; + } + //const uint64_t resultSign = ((lhsSign && rhsSign) || (resultMantissa & (lhsSign << 63))) << 63; uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; diff --git a/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl index f869f4ceba..e6e6477954 100644 --- a/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl @@ -85,7 +85,7 @@ struct traits : traits_base using bit_rep_t = typename unsigned_integer_of_size::type; using base_t = traits_base; - NBL_CONSTEXPR_STATIC_INLINE bit_rep_t signMask = bit_rep_t(0x1u) << (sizeof(Float) * 8 - 1); + NBL_CONSTEXPR_STATIC_INLINE bit_rep_t signMask = bit_rep_t(0x1ull) << (sizeof(Float) * 8 - 1); NBL_CONSTEXPR_STATIC_INLINE bit_rep_t exponentMask = ((~bit_rep_t(0)) << base_t::mantissaBitCnt) ^ signMask; NBL_CONSTEXPR_STATIC_INLINE bit_rep_t mantissaMask = (bit_rep_t(0x1u) << base_t::mantissaBitCnt) - 1; NBL_CONSTEXPR_STATIC_INLINE int exponentBias = (int(0x1) << (base_t::exponentBitCnt - 1)) - 1; From c9f7e47e5e08cba58a270bb7e6ed4ddf0e61eb2c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 6 Sep 2024 12:07:42 +0100 Subject: [PATCH 047/432] Enhanced more tests, more fixes --- examples_tests | 2 +- .../hlsl/emulated/emulated_float64_t.hlsl | 162 ++++++++---------- .../emulated/emulated_float64_t_impl.hlsl | 148 ++-------------- .../emulated/emulated_float64_t_utils.hlsl | 23 +-- include/nbl/builtin/hlsl/tgmath.hlsl | 2 +- 5 files changed, 95 insertions(+), 242 deletions(-) diff --git a/examples_tests b/examples_tests index a9e1007e59..1750aefb29 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a9e1007e592786182c25ed1f6c25ad9e9306107c +Subproject commit 1750aefb2936657a4d5cd242399c7ba8895f43a0 diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl index a6eb9311d3..ecf9aa0d22 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl @@ -80,38 +80,56 @@ namespace hlsl if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return bit_cast(ieee754::traits::quietNaN); + if (!FastMath && impl::areBothInfinity(data, rhs.data)) + { + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + + if(lhsSign == rhsSign) + return bit_cast(ieee754::traits::inf | lhsSign); + else if(lhsSign || rhsSign) + return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); + } + + if (!FastMath && tgmath::isinf(data)) + return bit_cast(data); + + if (!FastMath && tgmath::isinf(rhs.data)) + return bit_cast(rhs.data); + const int lhsBiasedExp = ieee754::extractBiasedExponent(data); const int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - if (lhsBiasedExp == 0ull) - return bit_cast(rhs.data); - if (rhsBiasedExp == 0ull) - return bit_cast(data); + uint64_t lhsData = impl::flushDenormToZero(lhsBiasedExp, data); + uint64_t rhsData = impl::flushDenormToZero(rhsBiasedExp, rhs.data); - const uint64_t lhsSign = ieee754::extractSign(data); - const uint64_t rhsSign = ieee754::extractSign(rhs.data); + uint64_t lhsSign = ieee754::extractSignPreserveBitPattern(lhsData); + uint64_t rhsSign = ieee754::extractSignPreserveBitPattern(rhsData); - if (!FastMath && tgmath::isinf(data)) - return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(data, rhs.data))); + if (!FastMath && impl::areBothZero(lhsData, rhsData)) + { + if (lhsSign == rhsSign) + return bit_cast(lhsSign); + else + return bit_cast(0ull); + } - uint64_t lhsNormMantissa = ieee754::extractNormalizeMantissa(data); - uint64_t rhsNormMantissa = ieee754::extractNormalizeMantissa(rhs.data); + if (!FastMath && tgmath::isinf(lhsData)) + return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(lhsData, rhsData))); + + uint64_t lhsNormMantissa = ieee754::extractNormalizeMantissa(lhsData); + uint64_t rhsNormMantissa = ieee754::extractNormalizeMantissa(rhsData); const int expDiff = lhsBiasedExp - rhsBiasedExp; const int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; const uint32_t shiftAmount = abs(expDiff); - uint64_t resultSign; if (expDiff < 0) { // so lhsNormMantissa always holds mantissa of number with greater exponent swap(lhsNormMantissa, rhsNormMantissa); - resultSign = rhsSign > 0 ? ieee754::traits::signMask : 0ull; - } - else - { - resultSign = lhsSign > 0 ? ieee754::traits::signMask : 0ull; + swap(lhsSign, rhsSign); } rhsNormMantissa >>= shiftAmount; @@ -121,7 +139,10 @@ namespace hlsl { int64_t mantissaDiff = lhsNormMantissa - rhsNormMantissa; if (mantissaDiff < 0) + { swap(lhsNormMantissa, rhsNormMantissa); + swap(lhsSign, rhsSign); + } lhsNormMantissa <<= 10; rhsNormMantissa <<= 10; @@ -132,20 +153,15 @@ namespace hlsl { resultMantissa = lhsNormMantissa + rhsNormMantissa; } - - //const uint64_t resultSign = ((lhsSign && rhsSign) || (resultMantissa & (lhsSign << 63))) << 63; uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; - resultMantissa = resultMantissa; - if (resultMantissa & 1ull << 53) { ++resultBiasedExp; resultMantissa >>= 1; } - // TODO: better implementation with no loop while (resultMantissa < (1ull << 52)) { --resultBiasedExp; @@ -153,7 +169,7 @@ namespace hlsl } resultMantissa &= ieee754::traits::mantissaMask; - uint64_t output = impl::assembleFloat64(resultSign, uint64_t(resultBiasedExp) << ieee754::traits::mantissaBitCnt, resultMantissa); + uint64_t output = impl::assembleFloat64(lhsSign, resultBiasedExp << ieee754::traits::mantissaBitCnt, resultMantissa); return bit_cast(output); } @@ -186,53 +202,27 @@ namespace hlsl { emulated_float64_t retval = this_t::create(0ull); - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - uint64_t lhsMantissa = ieee754::extractMantissa(data); - uint64_t rhsMantissa = ieee754::extractMantissa(rhs.data); int lhsBiasedExp = ieee754::extractBiasedExponent(data); int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; - uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - if (!FastMath) - { - if (lhsBiasedExp == ieee754::traits::specialValueExp) - { - if ((lhsMantissa != 0u) || ((rhsBiasedExp == ieee754::traits::specialValueExp) && (rhsMantissa != 0u))) - return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); - if ((uint64_t(rhsBiasedExp) | rhsMantissa) == 0u) - return bit_cast(ieee754::traits::quietNaN); + uint64_t lhsData = impl::flushDenormToZero(lhsBiasedExp, data); + uint64_t rhsData = impl::flushDenormToZero(rhsBiasedExp, rhs.data); - return bit_cast(impl::assembleFloat64(sign, ieee754::traits::exponentMask, 0ull)); - } - if (rhsBiasedExp == ieee754::traits::specialValueExp) - { - /* a cannot be NaN, but is b NaN? */ - if (rhsMantissa != 0u) -#ifdef RELAXED_NAN_PROPAGATION - return rhs.data; -#else - return bit_cast(impl::propagateFloat64NaN(data, rhs.data)); -#endif - if ((uint64_t(lhsBiasedExp) | lhsMantissa) == 0u) - return bit_cast(ieee754::traits::quietNaN); + uint64_t lhsSign = lhsData & ieee754::traits::signMask; + uint64_t rhsSign = rhsData & ieee754::traits::signMask; - return bit_cast(sign | ieee754::traits::exponentMask); - } - if (lhsBiasedExp == 0) - { - if (lhsMantissa == 0u) - return bit_cast(sign); - impl::normalizeFloat64Subnormal(lhsMantissa, lhsBiasedExp, lhsMantissa); - } - if (rhsBiasedExp == 0) - { - if (rhsMantissa == 0u) - return bit_cast(sign); - impl::normalizeFloat64Subnormal(rhsMantissa, rhsBiasedExp, rhsMantissa); - } - } + uint64_t lhsMantissa = ieee754::extractMantissa(lhsData); + uint64_t rhsMantissa = ieee754::extractMantissa(rhsData); + + int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; + uint64_t sign = (lhsData ^ rhsData) & ieee754::traits::signMask; + + if (!FastMath && (tgmath::isnan(lhsData) || tgmath::isnan(rhsData))) + return bit_cast(ieee754::traits::quietNaN | sign); + if (!FastMath && (tgmath::isinf(lhsData) || tgmath::isinf(rhsData))) + return bit_cast(ieee754::traits::inf | sign); + if (!FastMath && impl::areBothZero(lhsData, rhsData)) + return bit_cast(sign); const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); @@ -263,42 +253,36 @@ namespace hlsl return _static_cast(data) * create(rhs); } - /*this_t reciprocal(uint64_t x) - { - using ThisType = this_t; - ThisType output = ThisType::bit_cast((0xbfcdd6a18f6a6f52ULL - x) >> 1); - output = output * output; - return output; - }*/ - emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { if (FlushDenormToZero) { - //return this_t::bit_cast(data) * reciprocal(rhs.data); - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return bit_cast(ieee754::traits::quietNaN); const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; if (!FastMath && impl::isZero(rhs.data)) - return bit_cast(ieee754::traits::inf | sign); - + return bit_cast(ieee754::traits::quietNaN | sign); if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return bit_cast(ieee754::traits::quietNaN); - + return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); if (!FastMath && tgmath::isinf(data)) return bit_cast(ieee754::traits::inf | sign); - if (!FastMath && tgmath::isinf(rhs.data)) return bit_cast(0ull | sign); + if (!FastMath && impl::isZero(rhs.data)) + return bit_cast(ieee754::traits::quietNaN | sign); + + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + uint64_t lhsData = impl::flushDenormToZero(lhsBiasedExp, data); + uint64_t rhsData = impl::flushDenormToZero(rhsBiasedExp, rhs.data); - const uint64_t lhsRealMantissa = (ieee754::extractMantissa(data) | (1ull << ieee754::traits::mantissaBitCnt)); - const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhs.data) | (1ull << ieee754::traits::mantissaBitCnt); + const uint64_t lhsRealMantissa = (ieee754::extractMantissa(lhsData) | (1ull << ieee754::traits::mantissaBitCnt)); + const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhsData) | (1ull << ieee754::traits::mantissaBitCnt); - int exp = ieee754::extractExponent(data) - ieee754::extractExponent(rhs.data) + ieee754::traits::exponentBias; + int exp = lhsBiasedExp - rhsBiasedExp + int(ieee754::traits::exponentBias); uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); @@ -326,12 +310,10 @@ namespace hlsl { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; - // TODO: i'm not sure about this one if (!FastMath && impl::areBothZero(data, rhs.data)) return true; const emulated_float64_t xored = bit_cast(data ^ rhs.data); - // TODO: check what fast math returns for -0 == 0 if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) return true; @@ -348,7 +330,7 @@ namespace hlsl { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; - if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) + if (!FastMath && impl::areBothInfinity(data, rhs.data)) return false; if (!FastMath && impl::areBothZero(data, rhs.data)) return false; @@ -366,7 +348,7 @@ namespace hlsl { if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; - if (!FastMath && impl::areBothSameSignInfinity(data, rhs.data)) + if (!FastMath && impl::areBothInfinity(data, rhs.data)) return false; if (!FastMath && impl::areBothZero(data, rhs.data)) return false; @@ -495,7 +477,6 @@ struct static_cast_helper,void using From = emulated_float64_t; - // TODO: test static inline To cast(From v) { using ToAsFloat = typename float_of_size::type; @@ -511,13 +492,12 @@ struct static_cast_helper,void const int exponent = ieee754::extractExponent(v.data); if (!From::supportsFastMath()) { - //TODO: i have no idea why it doesn't work, fix - /*if (exponent > ieee754::traits::exponentMax) + if (exponent > ieee754::traits::exponentMax) return bit_cast(ieee754::traits::inf); if (exponent < ieee754::traits::exponentMin) - return -bit_cast(ieee754::traits::inf); + return bit_cast(-ieee754::traits::inf); if (tgmath::isnan(v.data)) - return bit_cast(ieee754::traits::quietNaN);*/ + return bit_cast(ieee754::traits::quietNaN); } diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl index d2b59e9607..6778cd34b5 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl @@ -97,9 +97,14 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; +NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +{ + return (val << 1) == 0; +} + inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) { - if (val == 0) + if (isZero(val)) return val; #ifndef __HLSL_VERSION @@ -187,6 +192,11 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rh #endif } +NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t extractedBiasedExponent, uint64_t value) +{ + return extractedBiasedExponent ? value : ieee754::extractSignPreserveBitPattern(value); +} + static inline int countLeadingZeros32(uint32_t val) { #ifndef __HLSL_VERSION @@ -215,122 +225,17 @@ NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count return output; } -NBL_CONSTEXPR_INLINE_FUNC uint32_t3 shift64ExtraRightJamming(uint32_t3 val, int count) -{ - uint32_t3 output; - output.x = 0u; - - int negCount = (-count) & 31; - - output.z = glsl::mix(uint32_t(val.x != 0u), val.x, count == 64); - output.z = glsl::mix(output.z, val.x << negCount, count < 64); - output.z = glsl::mix(output.z, val.y << negCount, count < 32); - - output.y = glsl::mix(0u, (val.x >> (count & 31)), count < 64); - output.y = glsl::mix(output.y, (val.x << negCount) | (val.y >> count), count < 32); - - val.z = glsl::mix(val.z | val.y, val.z, count < 32); - output.x = glsl::mix(output.x, val.x >> count, count < 32); - output.z |= uint32_t(val.z != 0u); - - output.x = glsl::mix(output.x, 0u, (count == 32)); - output.y = glsl::mix(output.y, val.x, (count == 32)); - output.z = glsl::mix(output.z, val.y, (count == 32)); - output.x = glsl::mix(output.x, val.x, (count == 0)); - output.y = glsl::mix(output.y, val.y, (count == 0)); - output.z = glsl::mix(output.z, val.z, (count == 0)); - - return output; -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t shortShift64Left(uint64_t val, int count) -{ - const uint32_t2 packed = packUint64(val); - - uint32_t2 output; - output.y = packed.y << count; - // TODO: fix - output.x = glsl::mix((packed.x << count | (packed.y >> ((-count) & 31))), packed.x, count == 0); - - return unpackUint64(output); -}; - NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted + expShifted + mantissa; } -NBL_CONSTEXPR_INLINE_FUNC uint64_t roundAndPackFloat64(uint64_t zSign, int zExp, uint32_t3 mantissaExtended) -{ - bool roundNearestEven; - bool increment; - - roundNearestEven = true; - increment = int(mantissaExtended.z) < 0; - - // overflow handling? - // if biased exp is lesser then 2045 - if (0x7FD <= zExp) - { - if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == mantissaExtended.x && 0xFFFFFFFFu == mantissaExtended.y) && increment)) - return assembleFloat64(zSign, 0x7FE << ieee754::traits::mantissaBitCnt, 0x000FFFFFFFFFFFFFull); - - return assembleFloat64(zSign, ieee754::traits::exponentMask, 0ull); - } - - if (zExp < 0) - { - mantissaExtended = shift64ExtraRightJamming(mantissaExtended, -zExp); - zExp = 0; - } - - zExp = glsl::mix(zExp, 0, (mantissaExtended.x | mantissaExtended.y) == 0u); - - return assembleFloat64(zSign, uint64_t(zExp) << ieee754::traits::mantissaBitCnt, unpackUint64(mantissaExtended.xy)); -} - -static inline uint64_t normalizeRoundAndPackFloat64(uint64_t sign, int exp, uint32_t frac0, uint32_t frac1) -{ - int shiftCount; - uint32_t3 frac = uint32_t3(frac0, frac1, 0u); - - if (frac.x == 0u) - { - exp -= 32; - frac.x = frac.y; - frac.y = 0u; - } - - shiftCount = countLeadingZeros32(frac.x) - 11; - if (0 <= shiftCount) - { - // TODO: this is packing and unpacking madness, fix it - frac.xy = packUint64(shortShift64Left(unpackUint64(frac.xy), shiftCount)); - } - else - { - frac.xyz = shift64ExtraRightJamming(uint32_t3(frac.xy, 0), -shiftCount); - } - exp -= shiftCount; - return roundAndPackFloat64(sign, exp, frac); -} - +//TODO: remove static inline void normalizeFloat64Subnormal(uint64_t mantissa, NBL_REF_ARG(int) outExp, NBL_REF_ARG(uint64_t) outMantissa) { - uint32_t2 mantissaPacked = packUint64(mantissa); - int shiftCount; - uint32_t2 temp; - shiftCount = countLeadingZeros32(glsl::mix(mantissaPacked.x, mantissaPacked.y, mantissaPacked.x == 0u)) - 11; - outExp = glsl::mix(1 - shiftCount, -shiftCount - 31, mantissaPacked.x == 0u); - - temp.x = glsl::mix(mantissaPacked.y << shiftCount, mantissaPacked.y >> (-shiftCount), shiftCount < 0); - temp.y = glsl::mix(0u, mantissaPacked.y << (shiftCount & 31), shiftCount < 0); - - shortShift64Left(impl::unpackUint64(mantissaPacked), shiftCount); - - outMantissa = glsl::mix(outMantissa, unpackUint64(temp), mantissaPacked.x == 0); + return; } NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) @@ -341,16 +246,6 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignInfinity(uint64_t lhs, uint64_t rhs) -{ - return lhs == rhs && (lhs & ~ieee754::traits::signMask) == ieee754::traits::inf; -} - -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) -{ - return (val << 1) == 0; -} - NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return ((lhs << 1) == 0ull) && ((rhs << 1) == 0ull); @@ -361,21 +256,6 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) return ((lhs << 1) == 0ull) && (lhs == rhs); } -// TODO: find more efficient algorithm -static inline uint64_t nlz64(uint64_t x) -{ - static const uint64_t MASK = 1ull << 63; - - uint64_t counter = 0; - - while ((x & MASK) == 0) - { - x <<= 1; - ++counter; - } - return counter; -} - // returns pair of quotient and remainder static inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) { @@ -383,9 +263,7 @@ static inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; uint64_t s; - //TODO: countl_zero s = countl_zero(divisor); - //s = nlz64(divisor); divisor <<= s; vn1 = divisor >> 32; vn0 = divisor & 0xFFFFFFFF; diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl index ac7b79e74b..6bc017fb73 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl @@ -163,7 +163,6 @@ using emulated_vector_t3 = emulated_vector; template using emulated_vector_t4 = emulated_vector; -// TODO: works only for float, fix namespace impl { @@ -265,21 +264,19 @@ struct emulated_matrix // : emulated_matrix_base; using portable_matrix64_t2x2 = portable_matrix_t2x2 >; using portable_matrix64_t3x3 = portable_matrix_t3x3 >; - -// TODO: fix template NBL_CONSTEXPR_INLINE_FUNC portable_float64_t<> create_portable_float64_t(T val) { diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 9fac5dce96..d603dea2c0 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -33,7 +33,7 @@ NBL_CONSTEXPR_INLINE_FUNC bool isinf(T val) using AsFloat = typename float_of_size::type; AsUint tmp = bit_cast(val); - return (tmp & ~ieee754::traits::signMask) == ieee754::traits::inf; + return (tmp & (~ieee754::traits::signMask)) == ieee754::traits::inf; } } From fbfc7c289cf02a272a3733f88ed2ab7cba7e71d4 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 6 Sep 2024 16:41:29 +0100 Subject: [PATCH 048/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1750aefb29..5e47cf34a4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1750aefb2936657a4d5cd242399c7ba8895f43a0 +Subproject commit 5e47cf34a46c1d32237a7f4aa5222a795aa81fde From 2743cb981828573e3686bd3a5aed182ddbfb252b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 13 Sep 2024 11:24:36 +0100 Subject: [PATCH 049/432] 64 bit float type is now determined by device capabilities --- examples_tests | 2 +- .../hlsl/emulated/emulated_float64_t.hlsl | 195 +++++++------ .../emulated/emulated_float64_t_impl.hlsl | 61 ++-- .../hlsl/math/equations/quadratic.hlsl | 2 +- .../nbl/builtin/hlsl/portable_float64_t.hlsl | 22 ++ ...tils.hlsl => portable_float64_t_math.hlsl} | 267 +++++++++--------- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 23 +- src/nbl/builtin/CMakeLists.txt | 3 +- 8 files changed, 308 insertions(+), 267 deletions(-) create mode 100644 include/nbl/builtin/hlsl/portable_float64_t.hlsl rename include/nbl/builtin/hlsl/{emulated/emulated_float64_t_utils.hlsl => portable_float64_t_math.hlsl} (64%) diff --git a/examples_tests b/examples_tests index 5e47cf34a4..904da40cc7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5e47cf34a46c1d32237a7f4aa5222a795aa81fde +Subproject commit 904da40cc731d5f25ff6319ec7f497b06c579b00 diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl index ecf9aa0d22..aa600e7eca 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl @@ -7,6 +7,16 @@ namespace nbl { namespace hlsl { + /*enum E_ROUNDING_MODE + { + FLOAT_ROUND_NEAREST_EVEN, + FLOAT_ROUND_TO_ZERO, + FLOAT_ROUND_DOWN, + FLOAT_ROUND_UP + };*/ + + // currently only FLOAT_ROUND_TO_ZERO is supported, cannot implement partial specialization in this case due to dxc bug https://github.com/microsoft/DirectXShaderCompiler/issues/5563 + // TODO: partial specializations with new template parameter `E_ROUNDING_MODE RoundingMode` template struct emulated_float64_t { @@ -77,25 +87,28 @@ namespace hlsl { if (FlushDenormToZero) { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return bit_cast(ieee754::traits::quietNaN); - - if (!FastMath && impl::areBothInfinity(data, rhs.data)) + if(FastMath) { - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; + if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + return bit_cast(ieee754::traits::quietNaN); - if(lhsSign == rhsSign) - return bit_cast(ieee754::traits::inf | lhsSign); - else if(lhsSign || rhsSign) - return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); - } + if (impl::areBothInfinity(data, rhs.data)) + { + uint64_t lhsSign = data & ieee754::traits::signMask; + uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - if (!FastMath && tgmath::isinf(data)) - return bit_cast(data); + if (lhsSign == rhsSign) + return bit_cast(ieee754::traits::inf | lhsSign); + else if (lhsSign || rhsSign) + return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); + } - if (!FastMath && tgmath::isinf(rhs.data)) - return bit_cast(rhs.data); + if (tgmath::isinf(data)) + return bit_cast(data); + + if (tgmath::isinf(rhs.data)) + return bit_cast(rhs.data); + } const int lhsBiasedExp = ieee754::extractBiasedExponent(data); const int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); @@ -105,17 +118,20 @@ namespace hlsl uint64_t lhsSign = ieee754::extractSignPreserveBitPattern(lhsData); uint64_t rhsSign = ieee754::extractSignPreserveBitPattern(rhsData); - - if (!FastMath && impl::areBothZero(lhsData, rhsData)) + + if(FastMath) { - if (lhsSign == rhsSign) - return bit_cast(lhsSign); - else - return bit_cast(0ull); - } + if (impl::areBothZero(lhsData, rhsData)) + { + if (lhsSign == rhsSign) + return bit_cast(lhsSign); + else + return bit_cast(0ull); + } - if (!FastMath && tgmath::isinf(lhsData)) - return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(lhsData, rhsData))); + if (tgmath::isinf(lhsData)) + return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(lhsData, rhsData))); + } uint64_t lhsNormMantissa = ieee754::extractNormalizeMantissa(lhsData); uint64_t rhsNormMantissa = ieee754::extractNormalizeMantissa(rhsData); @@ -156,6 +172,9 @@ namespace hlsl uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; + if (resultMantissa == 0ull) + return _static_cast(0ull); + if (resultMantissa & 1ull << 53) { ++resultBiasedExp; @@ -216,13 +235,16 @@ namespace hlsl int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; uint64_t sign = (lhsData ^ rhsData) & ieee754::traits::signMask; - - if (!FastMath && (tgmath::isnan(lhsData) || tgmath::isnan(rhsData))) - return bit_cast(ieee754::traits::quietNaN | sign); - if (!FastMath && (tgmath::isinf(lhsData) || tgmath::isinf(rhsData))) - return bit_cast(ieee754::traits::inf | sign); - if (!FastMath && impl::areBothZero(lhsData, rhsData)) - return bit_cast(sign); + if (FastMath) + { + if (tgmath::isnan(lhsData) || tgmath::isnan(rhsData)) + return bit_cast(ieee754::traits::quietNaN | sign); + if (tgmath::isinf(lhsData) || tgmath::isinf(rhsData)) + return bit_cast(ieee754::traits::inf | sign); + if (impl::areBothZero(lhsData, rhsData)) + return bit_cast(sign); + } + const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); @@ -232,6 +254,10 @@ namespace hlsl //const uint64_t RoundToNearest = (1ull << 31) - 1; uint64_t newPseudoMantissa = ((hi_l * hi_r) >> 10) + ((hi_l * lo_r + lo_l * hi_r/* + RoundToNearest*/) >> 31); + if (newPseudoMantissa == 0ull) + return _static_cast(0ull); + + if (newPseudoMantissa & (0x1ull << 53)) { newPseudoMantissa >>= 1; @@ -257,21 +283,23 @@ namespace hlsl { if (FlushDenormToZero) { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return bit_cast(ieee754::traits::quietNaN); - const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; - if (!FastMath && impl::isZero(rhs.data)) - return bit_cast(ieee754::traits::quietNaN | sign); - if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); - if (!FastMath && tgmath::isinf(data)) - return bit_cast(ieee754::traits::inf | sign); - if (!FastMath && tgmath::isinf(rhs.data)) - return bit_cast(0ull | sign); - if (!FastMath && impl::isZero(rhs.data)) - return bit_cast(ieee754::traits::quietNaN | sign); + if(FastMath) + { + if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + return bit_cast(ieee754::traits::quietNaN); + if (impl::isZero(rhs.data)) + return bit_cast(ieee754::traits::quietNaN | sign); + if (impl::areBothInfinity(data, rhs.data)) + return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); + if (tgmath::isinf(data)) + return bit_cast(ieee754::traits::inf | sign); + if (tgmath::isinf(rhs.data)) + return bit_cast(0ull | sign); + if (impl::isZero(rhs.data)) + return bit_cast(ieee754::traits::quietNaN | sign); + } int lhsBiasedExp = ieee754::extractBiasedExponent(data); int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); @@ -287,10 +315,13 @@ namespace hlsl uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); - while (mantissa < (1ull << 52)) + const int msb = impl::_findMSB(mantissa); + if(msb != -1) { - mantissa <<= 1; - exp--; + const int shiftAmount = 52 - msb; + assert(shiftAmount >= 0); + mantissa <<= shiftAmount; + exp -= shiftAmount; } mantissa &= ieee754::traits::mantissaMask; @@ -305,13 +336,15 @@ namespace hlsl } // relational operators - // TODO: should `FlushDenormToZero` affect relational operators? bool operator==(this_t rhs) NBL_CONST_MEMBER_FUNC { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - if (!FastMath && impl::areBothZero(data, rhs.data)) - return true; + if (FastMath) + { + if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + return false; + if (impl::areBothZero(data, rhs.data)) + return true; + } const emulated_float64_t xored = bit_cast(data ^ rhs.data); if ((xored.data & 0x7FFFFFFFFFFFFFFFull) == 0ull) @@ -321,19 +354,22 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + if (FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; return !(bit_cast(data) == rhs); } bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return false; - if (!FastMath && impl::areBothZero(data, rhs.data)) - return false; + if (FastMath) + { + if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + return false; + if (impl::areBothInfinity(data, rhs.data)) + return false; + if (impl::areBothZero(data, rhs.data)) + return false; + } const uint64_t lhsSign = ieee754::extractSign(data); const uint64_t rhsSign = ieee754::extractSign(rhs.data); @@ -346,12 +382,15 @@ namespace hlsl } bool operator>(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) - return false; - if (!FastMath && impl::areBothInfinity(data, rhs.data)) - return false; - if (!FastMath && impl::areBothZero(data, rhs.data)) - return false; + if (FastMath) + { + if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + return false; + if (impl::areBothInfinity(data, rhs.data)) + return false; + if (impl::areBothZero(data, rhs.data)) + return false; + } const uint64_t lhsSign = ieee754::extractSign(data); const uint64_t rhsSign = ieee754::extractSign(rhs.data); @@ -364,43 +403,25 @@ namespace hlsl } bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + if (FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; return !(bit_cast(data) > bit_cast(rhs.data)); } bool operator>=(emulated_float64_t rhs) { - if (!FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + if (FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) return false; return !(bit_cast(data) < bit_cast(rhs.data)); } - //logical operators - bool operator&&(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) && bool(rhs.data); } - bool operator||(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { return bool(data) || bool(rhs.data); } - bool operator!() NBL_CONST_MEMBER_FUNC { return !bool(data); } - emulated_float64_t flipSign() { return bit_cast(data ^ ieee754::traits::signMask); } - NBL_CONSTEXPR_STATIC_INLINE bool supportsFastMath() - { - return FastMath; - } - - enum E_ROUNDING_MODE - { - FLOAT_ROUND_NEAREST_EVEN, - FLOAT_ROUND_TO_ZERO, - FLOAT_ROUND_DOWN, - FLOAT_ROUND_UP - }; - - static const E_ROUNDING_MODE RoundingMode = E_ROUNDING_MODE::FLOAT_ROUND_TO_ZERO; + NBL_CONSTEXPR_STATIC bool isFastMathSupported = FastMath; }; #define IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(...) \ @@ -490,7 +511,7 @@ struct static_cast_helper,void { const int exponent = ieee754::extractExponent(v.data); - if (!From::supportsFastMath()) + if (!From::isFastMathSuppoerted) { if (exponent > ieee754::traits::exponentMax) return bit_cast(ieee754::traits::inf); diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl index 6778cd34b5..acb5adf713 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl @@ -63,16 +63,6 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t packFloat64(uint32_t zSign, int zExp, uint32_ return output; } -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 packUint64(uint64_t val) -{ - return uint32_t2((val & 0xFFFFFFFF00000000ull) >> 32, val & 0x00000000FFFFFFFFull); -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t unpackUint64(uint32_t2 val) -{ - return ((uint64_t(val.x) & 0x00000000FFFFFFFFull) << 32) | uint64_t(val.y); -} - template inline uint64_t castFloat32ToStorageType(float32_t val) { @@ -102,27 +92,36 @@ NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) return (val << 1) == 0; } -inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) +// TODO: where do i move this function? also rename +template +static inline int _findMSB(Int val) { - if (isZero(val)) - return val; + //static_assert(is_integral::value); +#ifndef __HLSL_VERSION + return nbl::hlsl::findMSB(val); +#else + return firstbithigh(val); +#endif +} +template <> +static inline int _findMSB(uint64_t val) +{ #ifndef __HLSL_VERSION - int exp = findMSB(val); + return nbl::hlsl::findMSB(val); #else - int exp = 63; - uint64_t mask = ieee754::traits::signMask; - while (!(val & mask)) - { - --exp; - mask >>= 1; - } + int msbHigh = firstbithigh(uint32_t(val >> 32)); + int msbLow = firstbithigh(uint32_t(val)); + return msbHigh != -1 ? msbHigh + 32 : msbLow; +#endif +} +inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) +{ + if (isZero(val)) + return val; - //uint32_t2 valPacked = packUint64(val); - //int exp = valPacked.x ? firstbithigh(valPacked.x) + 32 : firstbithigh(valPacked.y); - //exp = 63 - exp; -#endif + int exp = _findMSB(val); uint64_t mantissa; int shiftCnt = 52 - exp; @@ -197,13 +196,13 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t extractedBiasedExp return extractedBiasedExponent ? value : ieee754::extractSignPreserveBitPattern(value); } -static inline int countLeadingZeros32(uint32_t val) +template +static inline int countLeadingZeros(Int val) { -#ifndef __HLSL_VERSION - return 31 - findMSB(val); -#else - return 31 - firstbithigh(val); -#endif + static_assert(is_integral::value); + + NBL_CONSTEXPR_STATIC int BitCntSubOne = sizeof(Int) * 8 - 1; + return BitCntSubOne - _findMSB(val); } NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count) diff --git a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl index 4d40e6f327..5c7d60a870 100644 --- a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl +++ b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl @@ -5,7 +5,7 @@ #ifndef _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ #define _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ -#include +#include // TODO: Later include from correct hlsl header #ifndef nbl_hlsl_FLT_EPSILON diff --git a/include/nbl/builtin/hlsl/portable_float64_t.hlsl b/include/nbl/builtin/hlsl/portable_float64_t.hlsl new file mode 100644 index 0000000000..e82c0092ad --- /dev/null +++ b/include/nbl/builtin/hlsl/portable_float64_t.hlsl @@ -0,0 +1,22 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_INCLUDED_ + +#include +#include +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; +#else +using portable_float64_t = float64_t; +#endif + +//static_assert(sizeof(portable_float64_t) == sizeof(float64_t)); + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl b/include/nbl/builtin/hlsl/portable_float64_t_math.hlsl similarity index 64% rename from include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl rename to include/nbl/builtin/hlsl/portable_float64_t_math.hlsl index 6bc017fb73..1b21785833 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_utils.hlsl +++ b/include/nbl/builtin/hlsl/portable_float64_t_math.hlsl @@ -1,25 +1,15 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_UTILS_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_MATH_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_MATH_INCLUDED_ #include #include #include +#include namespace nbl { namespace hlsl { -// TODO: enable -//template -//using portable_float64_t = conditional_t::shaderFloat64, float64_t, typename emulated_float64_t >; - -#ifndef __HLSL_VERSION -template -using portable_float64_t = typename conditional >::type; -#else -template -using portable_float64_t = typename conditional >::type; -#endif template struct emulated_vector {}; @@ -177,18 +167,11 @@ struct static_cast_helper,emulated_vector,void> } -//template -//struct emulated_matrix_base -//{ -// using vec_t = emulated_vector; -// vec_t columns[M]; -//}; - template -struct emulated_matrix {}; // : emulated_matrix_base {}; +struct emulated_matrix {}; template -struct emulated_matrix// : emulated_matrix_base +struct emulated_matrix { using vec_t = emulated_vector_t2; using type = emulated_matrix; @@ -208,7 +191,7 @@ struct emulated_matrix// : emulated_matrix_base// : emulated_matrix_base -struct emulated_matrix // : emulated_matrix_base +struct emulated_matrix { using vec_t = emulated_vector_t3; using type = emulated_matrix; @@ -261,7 +244,7 @@ struct emulated_matrix // : emulated_matrix_base // : emulated_matrix_base; template using emulated_matrix_t3x3 = emulated_matrix; -namespace impl -{ - -template::value > +template::value> struct portable_vector -{ - using type = emulated_vector; -}; -// specialization for builtins -template -struct portable_vector { using type = vector; }; - -template::value > -struct portable_matrix -{ - using type = emulated_matrix; -}; - -template -struct portable_matrix +#ifdef __HLSL_VERSION +template +struct portable_vector { - using type = matrix; + using type = portable_vector; }; - -} +#endif template -using portable_vector_t = typename impl::portable_vector::type; +using portable_vector_t = typename portable_vector::type; template using portable_vector_t2 = portable_vector_t; @@ -342,88 +297,114 @@ using portable_vector_t3 = portable_vector_t; template using portable_vector_t4 = portable_vector_t; -using portable_vector64_t2 = portable_vector_t2 >; -using portable_vector64_t3 = portable_vector_t3 >; -using portable_vector64_t4 = portable_vector_t4 >; +#ifdef __HLSL_VERSION +template +using portable_vector64_t2 = portable_vector_t2 >; +template +using portable_vector64_t3 = portable_vector_t3 >; +template +using portable_vector64_t4 = portable_vector_t4 >; +#else +template +using portable_vector64_t2 = portable_vector_t2; +template +using portable_vector64_t3 = portable_vector_t3; +template +using portable_vector64_t4 = portable_vector_t4; +#endif +template::value> +struct portable_matrix +{ + using type = matrix; +}; +#ifdef __HLSL_VERSION template -using portable_matrix_t = typename impl::portable_matrix::type; +struct portable_matrix +{ + using type = emulated_matrix; +}; +#endif + +template +using portable_matrix_t = typename portable_matrix::type; template using portable_matrix_t2x2 = portable_matrix_t; template using portable_matrix_t3x3 = portable_matrix_t; -using portable_matrix64_t2x2 = portable_matrix_t2x2 >; -using portable_matrix64_t3x3 = portable_matrix_t3x3 >; - -template -NBL_CONSTEXPR_INLINE_FUNC portable_float64_t<> create_portable_float64_t(T val) -{ - return _static_cast >(val); -} - -template -NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t2 create_portable_vector64_t2(T val) -{ - portable_vector64_t2 output; - output.x = create_portable_float64_t(val); - output.y = create_portable_float64_t(val); - - return output; -} - -template -NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t2 create_portable_vector64_t2(X x, Y y) -{ - portable_vector64_t2 output; - output.x = create_portable_float64_t(x); - output.y = create_portable_float64_t(y); - - return output; -} - -template -NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t2 create_portable_vector64_t2_from_2d_vec(VecType vec) -{ - portable_vector64_t2 output; - output.x = create_portable_float64_t(vec.x); - output.y = create_portable_float64_t(vec.y); - - return output; -} - -template -NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t3(T val) -{ - portable_vector64_t3 output; - output.x = create_portable_float64_t(val); - output.y = create_portable_float64_t(val); - output.z = create_portable_float64_t(val); - - return output; -} - -template -NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t3(X x, Y y, Z z) -{ - portable_vector64_t3 output; - output.x = create_portable_float64_t(x); - output.y = create_portable_float64_t(y); - output.z = create_portable_float64_t(z); - return output; -} +#ifdef __HLSL_VERSION +template +using portable_matrix64_t2x2 = portable_matrix_t2x2 >; +template +using portable_matrix64_t3x3 = portable_matrix_t3x3 >; +#else +template +using portable_matrix64_t2x2 = portable_matrix_t2x2; +template +using portable_matrix64_t3x3 = portable_matrix_t3x3; +#endif -template -NBL_CONSTEXPR_INLINE_FUNC portable_vector64_t3 create_portable_vector64_t2_from_3d_vec(VecType vec) +namespace impl { - portable_vector64_t3 output; - output.x = create_portable_float64_t(vec.x); - output.y = create_portable_float64_t(vec.y); - output.z = create_portable_float64_t(vec.z); - - return output; + template + struct static_cast_helper, vector, void> + { + static inline emulated_vector cast(vector vec) + { + return portable_vector_t(_static_cast(vec.x), _static_cast(vec.y)); + } + }; + + template + struct static_cast_helper, vector, void> + { + static inline emulated_vector cast(vector vec) + { + return portable_vector_t(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z)); + } + }; + + template + struct static_cast_helper, vector, void> + { + static inline emulated_vector cast(vector vec) + { + return portable_vector_t(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z), _static_cast(vec.w)); + } + }; + + /*template + struct static_cast_helper, From, void> + { + static inline emulated_vector cast(From val) + { + To vecComponent = To::create(val); + return emulated_vector(vecComponent, vecComponent); + } + }; + + template + struct static_cast_helper, From, void> + { + static inline emulated_vector cast(From val) + { + To vecComponent = To::create(val); + return emulated_vector(vecComponent, vecComponent, vecComponent); + } + }; + + template + struct static_cast_helper, vector, void> + { + static inline emulated_vector cast(From val) + { + To vecComponent = To::create(val); + return emulated_vector(vecComponent, vecComponent, vecComponent, vecComponent); + } + };*/ } namespace impl @@ -433,7 +414,14 @@ struct PortableMul64Helper { static inline V multiply(M mat, V vec) { - return mat * vec; + V output; + M matTransposed = mat.getTransposed(); + + output.x = (matTransposed.columns[0] * vec).calcComponentSum(); + output.y = (matTransposed.columns[1] * vec).calcComponentSum(); + output.z = (matTransposed.columns[2] * vec).calcComponentSum(); + + return output; } }; @@ -447,12 +435,19 @@ struct PortableMul64Helper }; } +#ifdef __HLSL_VERSION +template +V portableMul64(M mat, V vec) +{ + return impl::PortableMul64Helper >::multiply(mat, vec); +} +#else template V portableMul64(M mat, V vec) { - return impl::PortableMul64Helper >::multiply(mat, vec); + return impl::PortableMul64Helper::multiply(mat, vec); } - +#endif } } diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index 7fdab46942..92f3839240 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -11,7 +11,7 @@ #include #include #include -#include +#include // TODO: Later include from correct hlsl header (numeric_limits.hlsl) #ifndef nbl_hlsl_FLT_EPSILON @@ -511,10 +511,13 @@ struct Quadratic }; // This function returns the analytic quartic equation to solve for lhs bezier's t value for intersection with another bezier curve -template +template static math::equations::Quartic getBezierBezierIntersectionEquation(NBL_CONST_REF_ARG(QuadraticBezier) lhs, NBL_CONST_REF_ARG(QuadraticBezier) rhs) { using float_t2 = portable_vector_t2; + using float64 = portable_float64_t; + using float64_vec2 = portable_vector64_t2; + // Algorithm based on Computer Aided Geometric Design: // https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=1000&context=facpub#page99 @@ -550,18 +553,18 @@ static math::equations::Quartic getBezierBezierIntersectionEquation(NBL Quadratic quadratic = Quadratic::constructFromBezier(lhs); // for convenience - const portable_vector64_t2 A = quadratic.A; - const portable_vector64_t2 B = quadratic.B; - const portable_vector64_t2 C = quadratic.C; + const float64_vec2 A = quadratic.A; + const float64_vec2 B = quadratic.B; + const float64_vec2 C = quadratic.C; // substitute parametric into implicit equation: // Getting the quartic params - portable_float64_t<> a = ((A.x * A.x) * k0) + (A.x * A.y * k1) + (A.y * A.y * k2); - portable_float64_t<> b = (A.x * B.x * k0 * 2.0f) + (A.x * B.y * k1) + (B.x * A.y * k1) + (A.y * B.y * k2 * 2.0f); - portable_float64_t<> c = (A.x * C.x * k0 * 2.0f) + (A.x * C.y * k1) + (A.x * k3) + ((B.x * B.x) * k0) + (B.x * B.y * k1) + (C.x * A.y * k1) + (A.y * C.y * k2 * 2.0f) + (A.y * k4) + ((B.y * B.y) * k2); - portable_float64_t<> d = (B.x * C.x * k0 * 2.0f) + (B.x * C.y * k1) + (B.x * k3) + (C.x * B.y * k1) + (B.y * C.y * k2 * 2.0f) + (B.y * k4); - portable_float64_t<> e = ((C.x * C.x) * k0) + (C.x * C.y * k1) + (C.x * k3) + ((C.y * C.y) * k2) + (C.y * k4) + (k5); + float64 a = ((A.x * A.x) * k0) + (A.x * A.y * k1) + (A.y * A.y * k2); + float64 b = (A.x * B.x * k0 * 2.0f) + (A.x * B.y * k1) + (B.x * A.y * k1) + (A.y * B.y * k2 * 2.0f); + float64 c = (A.x * C.x * k0 * 2.0f) + (A.x * C.y * k1) + (A.x * k3) + ((B.x * B.x) * k0) + (B.x * B.y * k1) + (C.x * A.y * k1) + (A.y * C.y * k2 * 2.0f) + (A.y * k4) + ((B.y * B.y) * k2); + float64 d = (B.x * C.x * k0 * 2.0f) + (B.x * C.y * k1) + (B.x * k3) + (C.x * B.y * k1) + (B.y * C.y * k2 * 2.0f) + (B.y * k4); + float64 e = ((C.x * C.x) * k0) + (C.x * C.y * k1) + (C.x * k3) + ((C.y * C.y) * k2) + (C.y * k4) + (k5); return math::equations::Quartic::construct( _static_cast(a), _static_cast(b), _static_cast(c), _static_cast(d), _static_cast(e)); diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e06d9e2077..8c58add15a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -233,12 +233,13 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_nor LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") #emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t_utils.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t_impl.hlsl") #ieee754 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/ieee754.hlsl") #utility LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable_float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable_float64_t_math.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/fragment_shader_pixel_interlock.hlsl") From 826d39b25bc7e13a4792e99b4f6850142fac2eab Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 13 Sep 2024 18:17:03 +0100 Subject: [PATCH 050/432] Refactor --- examples_tests | 2 +- ...emulated_float64_t.hlsl => float64_t.hlsl} | 6 +- ...loat64_t_impl.hlsl => float64_t_impl.hlsl} | 36 +- .../nbl/builtin/hlsl/emulated/matrix_t.hlsl | 76 +++ .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 185 +++++++ .../builtin/hlsl/{ieee754 => }/ieee754.hlsl | 58 +-- include/nbl/builtin/hlsl/ieee754/impl.hlsl | 54 +++ .../hlsl/math/equations/quadratic.hlsl | 2 +- .../float64_t.hlsl} | 2 +- .../nbl/builtin/hlsl/portable/matrix_t.hlsl | 92 ++++ .../nbl/builtin/hlsl/portable/vector_t.hlsl | 54 +++ .../builtin/hlsl/portable_float64_t_math.hlsl | 454 ------------------ include/nbl/builtin/hlsl/shapes/beziers.hlsl | 5 +- include/nbl/builtin/hlsl/tgmath.hlsl | 2 +- src/nbl/builtin/CMakeLists.txt | 15 +- 15 files changed, 495 insertions(+), 548 deletions(-) rename include/nbl/builtin/hlsl/emulated/{emulated_float64_t.hlsl => float64_t.hlsl} (99%) rename include/nbl/builtin/hlsl/emulated/{emulated_float64_t_impl.hlsl => float64_t_impl.hlsl} (85%) create mode 100644 include/nbl/builtin/hlsl/emulated/matrix_t.hlsl create mode 100644 include/nbl/builtin/hlsl/emulated/vector_t.hlsl rename include/nbl/builtin/hlsl/{ieee754 => }/ieee754.hlsl (62%) create mode 100644 include/nbl/builtin/hlsl/ieee754/impl.hlsl rename include/nbl/builtin/hlsl/{portable_float64_t.hlsl => portable/float64_t.hlsl} (90%) create mode 100644 include/nbl/builtin/hlsl/portable/matrix_t.hlsl create mode 100644 include/nbl/builtin/hlsl/portable/vector_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/portable_float64_t_math.hlsl diff --git a/examples_tests b/examples_tests index 904da40cc7..d6f0c587af 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 904da40cc731d5f25ff6319ec7f497b06c579b00 +Subproject commit d6f0c587af12531deea906d8fd582d70ee06db6c diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl similarity index 99% rename from include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl rename to include/nbl/builtin/hlsl/emulated/float64_t.hlsl index aa600e7eca..73d41b0001 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -1,7 +1,7 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_HLSL_INCLUDED_ -#include +#include namespace nbl { diff --git a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl similarity index 85% rename from include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl rename to include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index acb5adf713..4e5ad6c7be 100644 --- a/include/nbl/builtin/hlsl/emulated/emulated_float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -1,8 +1,8 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_IMPL_HLSL_INCLUDED_ #include -#include +#include #include #include #include @@ -196,37 +196,9 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t extractedBiasedExp return extractedBiasedExponent ? value : ieee754::extractSignPreserveBitPattern(value); } -template -static inline int countLeadingZeros(Int val) -{ - static_assert(is_integral::value); - - NBL_CONSTEXPR_STATIC int BitCntSubOne = sizeof(Int) * 8 - 1; - return BitCntSubOne - _findMSB(val); -} - -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 shift64RightJamming(uint32_t2 val, int count) -{ - uint32_t2 output; - const int negCount = (-count) & 31; - - output.x = glsl::mix(0u, val.x, count == 0); - output.x = glsl::mix(output.x, (val.x >> count), count < 32); - - output.y = uint32_t((val.x | val.y) != 0u); /* count >= 64 */ - uint32_t z1_lt64 = (val.x>>(count & 31)) | uint32_t(((val.x<>count) | uint32_t((val.y< + +namespace nbl +{ +namespace hlsl +{ + +template +struct emulated_matrix {}; + +template +struct emulated_matrix +{ + using vec_t = emulated_vector_t2; + using this_t = emulated_matrix; + + vec_t columns[2]; + + this_t getTransposed() NBL_CONST_MEMBER_FUNC + { + this_t output; + + output.columns[0].x = columns[0].x; + output.columns[1].x = columns[0].y; + + output.columns[0].y = columns[1].x; + output.columns[1].y = columns[1].y; + + return output; + } +}; + +template +struct emulated_matrix +{ + using vec_t = emulated_vector_t3; + using this_t = emulated_matrix; + + vec_t columns[3]; + + this_t getTransposed() NBL_CONST_MEMBER_FUNC + { + this_t output; + + output.columns[0].x = columns[0].x; + output.columns[1].x = columns[0].y; + output.columns[2].x = columns[0].z; + + output.columns[0].y = columns[1].x; + output.columns[1].y = columns[1].y; + output.columns[2].y = columns[1].z; + + output.columns[0].z = columns[2].x; + output.columns[1].z = columns[2].y; + output.columns[2].z = columns[2].z; + + return output; + } + + vec_t operator[](uint32_t columnIdx) + { + return columns[columnIdx]; + } +}; + +template +using emulated_matrix_t2x2 = emulated_matrix; +template +using emulated_matrix_t3x3 = emulated_matrix; + +} +} +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl new file mode 100644 index 0000000000..d4809647e4 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -0,0 +1,185 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +template +struct emulated_vector {}; + +template +struct emulated_vector +{ + using this_t = emulated_vector; + + EmulatedType x; + EmulatedType y; + + EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC + { + return x + y; + } + + NBL_CONSTEXPR_STATIC_INLINE this_t create(EmulatedType x, EmulatedType y) + { + this_t output; + output.x = x; + output.y = y; + + return output; + } + + this_t operator+(float rhs) + { + this_t output; + EmulatedType rhsAsEF64 = EmulatedType::create(rhs); + output.x = x + rhsAsEF64; + output.y = y + rhsAsEF64; + + return output; + } + + this_t operator+(EmulatedType rhs) + { + this_t output; + output.x = x + rhs; + output.y = y + rhs; + + return output; + } + + this_t operator+(this_t rhs) + { + this_t output; + output.x = x + rhs.x; + output.y = y + rhs.y; + + return output; + } + + this_t operator-(float rhs) + { + return create(x, y) + (-rhs); + } + + this_t operator-(EmulatedType rhs) + { + return create(x, y) + (rhs.flipSign()); + } + + this_t operator-(this_t rhs) + { + rhs.x = rhs.x.flipSign(); + rhs.y = rhs.y.flipSign(); + return create(x, y) + rhs; + } + + this_t operator*(float rhs) + { + this_t output; + EmulatedType rhsAsEF64 = EmulatedType::create(rhs); + output.x = x * rhsAsEF64; + output.y = y * rhsAsEF64; + + return output; + } + + this_t operator*(EmulatedType rhs) + { + this_t output; + output.x = x * rhs; + output.y = y * rhs; + + return output; + } + + this_t operator*(this_t rhs) + { + this_t output; + output.x = x * rhs.x; + output.y = y * rhs.y; + + return output; + } +}; + +template +struct emulated_vector +{ + using this_t = emulated_vector; + + EmulatedType x; + EmulatedType y; + EmulatedType z; + + EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC + { + return x + y + z; + } + + this_t operator*(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t output; + output.x = x * rhs.x; + output.y = y * rhs.y; + output.z = z * rhs.z; + + return output; + } +}; + +template +struct emulated_vector +{ + using type = emulated_vector; + + EmulatedType x; + EmulatedType y; + EmulatedType z; + EmulatedType w; +}; + +template +using emulated_vector_t2 = emulated_vector; +template +using emulated_vector_t3 = emulated_vector; +template +using emulated_vector_t4 = emulated_vector; + +namespace impl +{ +template +struct static_cast_helper, vector, void> +{ + static inline emulated_vector_t2 cast(vector vec) + { + return emulated_vector_t2(_static_cast(vec.x), _static_cast(vec.y)); + } +}; + +template +struct static_cast_helper, vector, void> +{ + static inline emulated_vector_t3 cast(vector vec) + { + return emulated_vector_t3(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z)); + } +}; + +template +struct static_cast_helper, vector, void> +{ + static inline emulated_vector_t4 cast(vector vec) + { + return emulated_vector_t4(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z), _static_cast(vec.w)); + } +}; +} + +} +} +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl similarity index 62% rename from include/nbl/builtin/hlsl/ieee754/ieee754.hlsl rename to include/nbl/builtin/hlsl/ieee754.hlsl index e6e6477954..26593a2f62 100644 --- a/include/nbl/builtin/hlsl/ieee754/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -1,9 +1,7 @@ -#ifndef _NBL_BUILTIN_HLSL_IEE754_H_INCLUDED_ -#define _NBL_BUILTIN_HLSL_IEE754_H_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_IEE754_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_IEE754_HLSL_INCLUDED_ -#include -#include -#include +#include namespace nbl { @@ -12,46 +10,10 @@ namespace hlsl namespace ieee754 { -// TODO: move to builtin/hlsl/impl/ieee754_impl.hlsl? -namespace impl -{ - template - NBL_CONSTEXPR_INLINE_FUNC bool isTypeAllowed() - { - return is_same::value || - is_same::value || - is_same::value || - is_same::value || - is_same::value || - is_same::value; - } - - template - NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type bitCastToUintType(T x) - { - using AsUint = typename unsigned_integer_of_size::type; - return bit_cast(x); - } - // to avoid bit cast from uintN_t to uintN_t - template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<2>::type bitCastToUintType(uint16_t x) { return x; } - template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<4>::type bitCastToUintType(uint32_t x) { return x; } - template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<8>::type bitCastToUintType(uint64_t x) { return x; } - - template - NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) - { - using AsFloat = typename float_of_size::type; - return bit_cast(x); - } - template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } - template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } - template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } -} - template struct traits_base { - static_assert(is_same::value || is_same::value); + static_assert(is_same::value || is_same::value || is_same::value); NBL_CONSTEXPR_STATIC_INLINE int16_t exponentBitCnt = int16_t(0xbeef); NBL_CONSTEXPR_STATIC_INLINE int16_t mantissaBitCnt = int16_t(0xbeef); }; @@ -102,7 +64,7 @@ template inline uint32_t extractBiasedExponent(T x) { using AsUint = typename unsigned_integer_of_size::type; - return glsl::bitfieldExtract(impl::bitCastToUintType(x), traits::type>::mantissaBitCnt, traits::type>::exponentBitCnt); + return glsl::bitfieldExtract(ieee754::impl::bitCastToUintType(x), traits::type>::mantissaBitCnt, traits::type>::exponentBitCnt); } template<> @@ -115,7 +77,7 @@ inline uint32_t extractBiasedExponent(uint64_t x) template<> inline uint32_t extractBiasedExponent(float64_t x) { - return extractBiasedExponent(impl::bitCastToUintType(x)); + return extractBiasedExponent(ieee754::impl::bitCastToUintType(x)); } template @@ -131,7 +93,7 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // TODO: //staticAssertTmp(impl::isTypeAllowed(), "Invalid type! Only floating point or unsigned integer types are allowed."); using AsFloat = typename float_of_size::type; - return impl::castBackToFloatType(glsl::bitfieldInsert(impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); + return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); } // performs no overflow tests, returns x*exp2(n) @@ -145,7 +107,7 @@ template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; - return impl::bitCastToUintType(x) & traits::type>::mantissaMask; + return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template @@ -160,14 +122,14 @@ template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; - return (impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); + return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; - return impl::bitCastToUintType(x) & traits::signMask; + return ieee754::impl::bitCastToUintType(x) & traits::signMask; } } diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl new file mode 100644 index 0000000000..e17eb9a8c7 --- /dev/null +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -0,0 +1,54 @@ +#ifndef _NBL_BUILTIN_HLSL_IEE754_IMPL_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_IEE754_IMPL_HLSL_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace ieee754 +{ + +namespace impl +{ +template +NBL_CONSTEXPR_INLINE_FUNC bool isTypeAllowed() +{ + return is_same::value || + is_same::value || + is_same::value || + is_same::value || + is_same::value || + is_same::value; +} + +template +NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type bitCastToUintType(T x) +{ + using AsUint = typename unsigned_integer_of_size::type; + return bit_cast(x); +} +// to avoid bit cast from uintN_t to uintN_t +template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<2>::type bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<4>::type bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<8>::type bitCastToUintType(uint64_t x) { return x; } + +template +NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +{ + using AsFloat = typename float_of_size::type; + return bit_cast(x); +} +template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +} + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl index 5c7d60a870..1f93e0c5ff 100644 --- a/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl +++ b/include/nbl/builtin/hlsl/math/equations/quadratic.hlsl @@ -5,7 +5,7 @@ #ifndef _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ #define _NBL_BUILTIN_HLSL_MATH_EQUATIONS_QUADRATIC_INCLUDED_ -#include +#include // TODO: Later include from correct hlsl header #ifndef nbl_hlsl_FLT_EPSILON diff --git a/include/nbl/builtin/hlsl/portable_float64_t.hlsl b/include/nbl/builtin/hlsl/portable/float64_t.hlsl similarity index 90% rename from include/nbl/builtin/hlsl/portable_float64_t.hlsl rename to include/nbl/builtin/hlsl/portable/float64_t.hlsl index e82c0092ad..92b6e53133 100644 --- a/include/nbl/builtin/hlsl/portable_float64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/float64_t.hlsl @@ -1,7 +1,7 @@ #ifndef _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_INCLUDED_ #define _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_INCLUDED_ -#include +#include #include namespace nbl { diff --git a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl new file mode 100644 index 0000000000..8cf3d8233f --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl @@ -0,0 +1,92 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_MATRIX_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_MATRIX_T_HLSL_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ + +template::value> +struct portable_matrix +{ + using type = matrix; +}; +#ifdef __HLSL_VERSION +template +struct portable_matrix +{ + using type = emulated_matrix; +}; +#endif + +template +using portable_matrix_t = typename portable_matrix::type; + +template +using portable_matrix_t2x2 = portable_matrix_t; +template +using portable_matrix_t3x3 = portable_matrix_t; + + +#ifdef __HLSL_VERSION +template +using portable_matrix64_t2x2 = portable_matrix_t2x2 >; +template +using portable_matrix64_t3x3 = portable_matrix_t3x3 >; +#else +template +using portable_matrix64_t2x2 = portable_matrix_t2x2; +template +using portable_matrix64_t3x3 = portable_matrix_t3x3; +#endif + +namespace impl +{ +// TODO: move to emulated/matrix.hlsl +template +struct PortableMul64Helper +{ + static inline V multiply(M mat, V vec) + { + V output; + M matTransposed = mat.getTransposed(); + + output.x = (matTransposed.columns[0] * vec).calcComponentSum(); + output.y = (matTransposed.columns[1] * vec).calcComponentSum(); + output.z = (matTransposed.columns[2] * vec).calcComponentSum(); + + return output; + } +}; + +template +struct PortableMul64Helper +{ + static inline V multiply(M mat, V vec) + { + return mul(mat, vec); + } +}; +} + +#ifdef __HLSL_VERSION +template +V portableMul64(M mat, V vec) +{ + return impl::PortableMul64Helper >::multiply(mat, vec); +} +#else +template +V portableMul64(M mat, V vec) +{ + return impl::PortableMul64Helper::multiply(mat, vec); +} +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl new file mode 100644 index 0000000000..474975fb21 --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -0,0 +1,54 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_VECTOR_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_VECTOR_T_HLSL_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ + +template::value> +struct portable_vector +{ + using type = vector; +}; +#ifdef __HLSL_VERSION +template +struct portable_vector +{ + using type = portable_vector; +}; +#endif + +template +using portable_vector_t = typename portable_vector::type; + +template +using portable_vector_t2 = portable_vector_t; +template +using portable_vector_t3 = portable_vector_t; +template +using portable_vector_t4 = portable_vector_t; + +#ifdef __HLSL_VERSION +template +using portable_vector64_t2 = portable_vector_t2 >; +template +using portable_vector64_t3 = portable_vector_t3 >; +template +using portable_vector64_t4 = portable_vector_t4 >; +#else +template +using portable_vector64_t2 = portable_vector_t2; +template +using portable_vector64_t3 = portable_vector_t3; +template +using portable_vector64_t4 = portable_vector_t4; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable_float64_t_math.hlsl b/include/nbl/builtin/hlsl/portable_float64_t_math.hlsl deleted file mode 100644 index 1b21785833..0000000000 --- a/include/nbl/builtin/hlsl/portable_float64_t_math.hlsl +++ /dev/null @@ -1,454 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_MATH_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PORTABLE_FLOAT64_T_MATH_INCLUDED_ - -#include -#include -#include -#include - -namespace nbl -{ -namespace hlsl -{ - -template -struct emulated_vector {}; - -template -struct emulated_vector -{ - using type = emulated_vector; - - EmulatedType x; - EmulatedType y; - - EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC - { - return x + y; - } - - NBL_CONSTEXPR_STATIC_INLINE type create(EmulatedType x, EmulatedType y) - { - type output; - output.x = x; - output.y = y; - - return output; - } - - type operator+(float rhs) - { - type output; - EmulatedType rhsAsEF64 = EmulatedType::create(rhs); - output.x = x + rhsAsEF64; - output.y = y + rhsAsEF64; - - return output; - } - - type operator+(EmulatedType rhs) - { - type output; - output.x = x + rhs; - output.y = y + rhs; - - return output; - } - - type operator+(type rhs) - { - type output; - output.x = x + rhs.x; - output.y = y + rhs.y; - - return output; - } - - type operator-(float rhs) - { - return create(x, y) + (-rhs); - } - - type operator-(EmulatedType rhs) - { - return create(x, y) + (rhs.flipSign()); - } - - type operator-(type rhs) - { - rhs.x = rhs.x.flipSign(); - rhs.y = rhs.y.flipSign(); - return create(x, y) + rhs; - } - - type operator*(float rhs) - { - type output; - EmulatedType rhsAsEF64 = EmulatedType::create(rhs); - output.x = x * rhsAsEF64; - output.y = y * rhsAsEF64; - - return output; - } - - type operator*(EmulatedType rhs) - { - type output; - output.x = x * rhs; - output.y = y * rhs; - - return output; - } - - type operator*(type rhs) - { - type output; - output.x = x * rhs.x; - output.y = y * rhs.y; - - return output; - } -}; - -template -struct emulated_vector -{ - using type = emulated_vector; - - EmulatedType x; - EmulatedType y; - EmulatedType z; - - EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC - { - return x + y + z; - } - - type operator*(NBL_CONST_REF_ARG(type) rhs) NBL_CONST_MEMBER_FUNC - { - type output; - output.x = x * rhs.x; - output.y = y * rhs.y; - output.z = z * rhs.z; - - return output; - } -}; - -template -struct emulated_vector -{ - using type = emulated_vector; - - EmulatedType x; - EmulatedType y; - EmulatedType z; - EmulatedType w; -}; - -template -using emulated_vector_t2 = emulated_vector; -template -using emulated_vector_t3 = emulated_vector; -template -using emulated_vector_t4 = emulated_vector; - -namespace impl -{ - -template -struct static_cast_helper,emulated_vector,void> -{ - static inline vector cast(emulated_vector vec) - { - return vector(_static_cast(vec.x), _static_cast(vec.y)); - } -}; - -} - -template -struct emulated_matrix {}; - -template -struct emulated_matrix -{ - using vec_t = emulated_vector_t2; - using type = emulated_matrix; - - vec_t columns[2]; - - type getTransposed() NBL_CONST_MEMBER_FUNC - { - type output; - - output.columns[0].x = columns[0].x; - output.columns[1].x = columns[0].y; - - output.columns[0].y = columns[1].x; - output.columns[1].y = columns[1].y; - - return output; - } - - /*type operator*(NBL_CONST_REF_ARG(type) rhs) NBL_CONST_MEMBER_FUNC - { - type output; - type lhsTransposed = getTransposed(); - - output.columns[0].x = (lhsTransposed.columns[0] * rhs.columns[0]).calcComponentSum(); - output.columns[0].y = (lhsTransposed.columns[0] * rhs.columns[1]).calcComponentSum(); - - output.columns[1].x = (lhsTransposed.columns[1] * rhs.columns[0]).calcComponentSum(); - output.columns[1].y = (lhsTransposed.columns[1] * rhs.columns[1]).calcComponentSum(); - - return output.getTransposed(); - } - - vec_t operator*(NBL_CONST_REF_ARG(vec_t) rhs) - { - vec_t output; - type lhsTransposed = getTransposed(); - - output.x = (columns[0] * rhs).calcComponentSum(); - output.y = (columns[1] * rhs).calcComponentSum(); - - return output; - }*/ -}; - -template -struct emulated_matrix -{ - using vec_t = emulated_vector_t3; - using type = emulated_matrix; - - vec_t columns[3]; - - type getTransposed() NBL_CONST_MEMBER_FUNC - { - type output; - - output.columns[0].x = columns[0].x; - output.columns[1].x = columns[0].y; - output.columns[2].x = columns[0].z; - - output.columns[0].y = columns[1].x; - output.columns[1].y = columns[1].y; - output.columns[2].y = columns[1].z; - - output.columns[0].z = columns[2].x; - output.columns[1].z = columns[2].y; - output.columns[2].z = columns[2].z; - - return output; - } - - /*type operator*(NBL_CONST_REF_ARG(type) rhs) NBL_CONST_MEMBER_FUNC - { - type output; - - output.columns[0].x = (columns[0] * rhs.columns[0]).calcComponentSum(); - output.columns[0].y = (columns[0] * rhs.columns[1]).calcComponentSum(); - output.columns[0].z = (columns[0] * rhs.columns[2]).calcComponentSum(); - - output.columns[1].x = (columns[1] * rhs.columns[0]).calcComponentSum(); - output.columns[1].y = (columns[1] * rhs.columns[1]).calcComponentSum(); - output.columns[1].z = (columns[1] * rhs.columns[2]).calcComponentSum(); - - output.columns[2].x = (columns[2] * rhs.columns[0]).calcComponentSum(); - output.columns[2].y = (columns[2] * rhs.columns[1]).calcComponentSum(); - output.columns[2].z = (columns[2] * rhs.columns[2]).calcComponentSum(); - - return output.getTransposed(); - }*/ - - vec_t operator[](uint32_t columnIdx) - { - return columns[columnIdx]; - } -}; - -template -using emulated_matrix_t2x2 = emulated_matrix; -template -using emulated_matrix_t3x3 = emulated_matrix; - -template::value> -struct portable_vector -{ - using type = vector; -}; -#ifdef __HLSL_VERSION -template -struct portable_vector -{ - using type = portable_vector; -}; -#endif - -template -using portable_vector_t = typename portable_vector::type; - -template -using portable_vector_t2 = portable_vector_t; -template -using portable_vector_t3 = portable_vector_t; -template -using portable_vector_t4 = portable_vector_t; - -#ifdef __HLSL_VERSION -template -using portable_vector64_t2 = portable_vector_t2 >; -template -using portable_vector64_t3 = portable_vector_t3 >; -template -using portable_vector64_t4 = portable_vector_t4 >; -#else -template -using portable_vector64_t2 = portable_vector_t2; -template -using portable_vector64_t3 = portable_vector_t3; -template -using portable_vector64_t4 = portable_vector_t4; -#endif - -template::value> -struct portable_matrix -{ - using type = matrix; -}; -#ifdef __HLSL_VERSION -template -struct portable_matrix -{ - using type = emulated_matrix; -}; -#endif - -template -using portable_matrix_t = typename portable_matrix::type; - -template -using portable_matrix_t2x2 = portable_matrix_t; -template -using portable_matrix_t3x3 = portable_matrix_t; - - -#ifdef __HLSL_VERSION -template -using portable_matrix64_t2x2 = portable_matrix_t2x2 >; -template -using portable_matrix64_t3x3 = portable_matrix_t3x3 >; -#else -template -using portable_matrix64_t2x2 = portable_matrix_t2x2; -template -using portable_matrix64_t3x3 = portable_matrix_t3x3; -#endif - -namespace impl -{ - template - struct static_cast_helper, vector, void> - { - static inline emulated_vector cast(vector vec) - { - return portable_vector_t(_static_cast(vec.x), _static_cast(vec.y)); - } - }; - - template - struct static_cast_helper, vector, void> - { - static inline emulated_vector cast(vector vec) - { - return portable_vector_t(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z)); - } - }; - - template - struct static_cast_helper, vector, void> - { - static inline emulated_vector cast(vector vec) - { - return portable_vector_t(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z), _static_cast(vec.w)); - } - }; - - /*template - struct static_cast_helper, From, void> - { - static inline emulated_vector cast(From val) - { - To vecComponent = To::create(val); - return emulated_vector(vecComponent, vecComponent); - } - }; - - template - struct static_cast_helper, From, void> - { - static inline emulated_vector cast(From val) - { - To vecComponent = To::create(val); - return emulated_vector(vecComponent, vecComponent, vecComponent); - } - }; - - template - struct static_cast_helper, vector, void> - { - static inline emulated_vector cast(From val) - { - To vecComponent = To::create(val); - return emulated_vector(vecComponent, vecComponent, vecComponent, vecComponent); - } - };*/ -} - -namespace impl -{ -template -struct PortableMul64Helper -{ - static inline V multiply(M mat, V vec) - { - V output; - M matTransposed = mat.getTransposed(); - - output.x = (matTransposed.columns[0] * vec).calcComponentSum(); - output.y = (matTransposed.columns[1] * vec).calcComponentSum(); - output.z = (matTransposed.columns[2] * vec).calcComponentSum(); - - return output; - } -}; - -template -struct PortableMul64Helper -{ - static inline V multiply(M mat, V vec) - { - return mul(mat, vec); - } -}; -} - -#ifdef __HLSL_VERSION -template -V portableMul64(M mat, V vec) -{ - return impl::PortableMul64Helper >::multiply(mat, vec); -} -#else -template -V portableMul64(M mat, V vec) -{ - return impl::PortableMul64Helper::multiply(mat, vec); -} -#endif - -} -} -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index 92f3839240..936a37fa7e 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -10,8 +10,9 @@ #include #include #include -#include -#include +#include +#include +#include // TODO: Later include from correct hlsl header (numeric_limits.hlsl) #ifndef nbl_hlsl_FLT_EPSILON diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index d603dea2c0..0edb47eb1f 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -5,7 +5,7 @@ #define _NBL_BUILTIN_HLSL_TGMATH_INCLUDED_ #include -#include +#include #include namespace nbl diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 8c58add15a..f06deb7c46 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -232,14 +232,19 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_nor # HLSL LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") #emulated -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/emulated_float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t_impl.hlsl") +#portable +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") #ieee754 -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/ieee754.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/impl.hlsl") #utility LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable_float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable_float64_t_math.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/fragment_shader_pixel_interlock.hlsl") From fb0ebd323c9cce1e5fc6502ceb4d13022b473f09 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 13 Sep 2024 21:51:03 +0100 Subject: [PATCH 051/432] More refactor --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 88 +++++++++---------- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 38 +------- include/nbl/builtin/hlsl/tgmath.hlsl | 32 +++++-- 4 files changed, 71 insertions(+), 89 deletions(-) diff --git a/examples_tests b/examples_tests index d6f0c587af..7f7b48596a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d6f0c587af12531deea906d8fd582d70ee06db6c +Subproject commit 7f7b48596a61c43d5d72b1e032525cc07ece91a8 diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 73d41b0001..6051cee7b5 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -38,28 +38,28 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE this_t create(int32_t val) { - return bit_cast(impl::castToUint64WithFloat64BitPattern(int64_t(val))); + return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(int64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE this_t create(int64_t val) { - return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE this_t create(uint32_t val) { - return bit_cast(impl::castToUint64WithFloat64BitPattern(uint64_t(val))); + return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(uint64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE this_t create(uint64_t val) { - return bit_cast(impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE this_t create(float32_t val) { this_t output; - output.data = impl::castFloat32ToStorageType(val); + output.data = emulated_float64_t_impl::castFloat32ToStorageType(val); return output; } @@ -89,10 +89,10 @@ namespace hlsl { if(FastMath) { - if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return bit_cast(ieee754::traits::quietNaN); - if (impl::areBothInfinity(data, rhs.data)) + if (emulated_float64_t_impl::areBothInfinity(data, rhs.data)) { uint64_t lhsSign = data & ieee754::traits::signMask; uint64_t rhsSign = rhs.data & ieee754::traits::signMask; @@ -103,25 +103,25 @@ namespace hlsl return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); } - if (tgmath::isinf(data)) + if (tgmath::isInf(data)) return bit_cast(data); - if (tgmath::isinf(rhs.data)) + if (tgmath::isInf(rhs.data)) return bit_cast(rhs.data); } const int lhsBiasedExp = ieee754::extractBiasedExponent(data); const int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - uint64_t lhsData = impl::flushDenormToZero(lhsBiasedExp, data); - uint64_t rhsData = impl::flushDenormToZero(rhsBiasedExp, rhs.data); + uint64_t lhsData = emulated_float64_t_impl::flushDenormToZero(lhsBiasedExp, data); + uint64_t rhsData = emulated_float64_t_impl::flushDenormToZero(rhsBiasedExp, rhs.data); uint64_t lhsSign = ieee754::extractSignPreserveBitPattern(lhsData); uint64_t rhsSign = ieee754::extractSignPreserveBitPattern(rhsData); if(FastMath) { - if (impl::areBothZero(lhsData, rhsData)) + if (emulated_float64_t_impl::areBothZero(lhsData, rhsData)) { if (lhsSign == rhsSign) return bit_cast(lhsSign); @@ -129,7 +129,7 @@ namespace hlsl return bit_cast(0ull); } - if (tgmath::isinf(lhsData)) + if (tgmath::isInf(lhsData)) return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(lhsData, rhsData))); } @@ -188,7 +188,7 @@ namespace hlsl } resultMantissa &= ieee754::traits::mantissaMask; - uint64_t output = impl::assembleFloat64(lhsSign, resultBiasedExp << ieee754::traits::mantissaBitCnt, resultMantissa); + uint64_t output = emulated_float64_t_impl::assembleFloat64(lhsSign, resultBiasedExp << ieee754::traits::mantissaBitCnt, resultMantissa); return bit_cast(output); } @@ -224,8 +224,8 @@ namespace hlsl int lhsBiasedExp = ieee754::extractBiasedExponent(data); int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - uint64_t lhsData = impl::flushDenormToZero(lhsBiasedExp, data); - uint64_t rhsData = impl::flushDenormToZero(rhsBiasedExp, rhs.data); + uint64_t lhsData = emulated_float64_t_impl::flushDenormToZero(lhsBiasedExp, data); + uint64_t rhsData = emulated_float64_t_impl::flushDenormToZero(rhsBiasedExp, rhs.data); uint64_t lhsSign = lhsData & ieee754::traits::signMask; uint64_t rhsSign = rhsData & ieee754::traits::signMask; @@ -237,11 +237,11 @@ namespace hlsl uint64_t sign = (lhsData ^ rhsData) & ieee754::traits::signMask; if (FastMath) { - if (tgmath::isnan(lhsData) || tgmath::isnan(rhsData)) + if (tgmath::isNaN(lhsData) || tgmath::isNaN(rhsData)) return bit_cast(ieee754::traits::quietNaN | sign); - if (tgmath::isinf(lhsData) || tgmath::isinf(rhsData)) + if (tgmath::isInf(lhsData) || tgmath::isInf(rhsData)) return bit_cast(ieee754::traits::inf | sign); - if (impl::areBothZero(lhsData, rhsData)) + if (emulated_float64_t_impl::areBothZero(lhsData, rhsData)) return bit_cast(sign); } @@ -265,7 +265,7 @@ namespace hlsl } newPseudoMantissa &= (ieee754::traits::mantissaMask); - return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); + return bit_cast(emulated_float64_t_impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, newPseudoMantissa)); } else { @@ -287,35 +287,35 @@ namespace hlsl if(FastMath) { - if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return bit_cast(ieee754::traits::quietNaN); - if (impl::isZero(rhs.data)) + if (emulated_float64_t_impl::isZero(rhs.data)) return bit_cast(ieee754::traits::quietNaN | sign); - if (impl::areBothInfinity(data, rhs.data)) + if (emulated_float64_t_impl::areBothInfinity(data, rhs.data)) return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); - if (tgmath::isinf(data)) + if (tgmath::isInf(data)) return bit_cast(ieee754::traits::inf | sign); - if (tgmath::isinf(rhs.data)) + if (tgmath::isInf(rhs.data)) return bit_cast(0ull | sign); - if (impl::isZero(rhs.data)) + if (emulated_float64_t_impl::isZero(rhs.data)) return bit_cast(ieee754::traits::quietNaN | sign); } int lhsBiasedExp = ieee754::extractBiasedExponent(data); int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - uint64_t lhsData = impl::flushDenormToZero(lhsBiasedExp, data); - uint64_t rhsData = impl::flushDenormToZero(rhsBiasedExp, rhs.data); + uint64_t lhsData = emulated_float64_t_impl::flushDenormToZero(lhsBiasedExp, data); + uint64_t rhsData = emulated_float64_t_impl::flushDenormToZero(rhsBiasedExp, rhs.data); const uint64_t lhsRealMantissa = (ieee754::extractMantissa(lhsData) | (1ull << ieee754::traits::mantissaBitCnt)); const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhsData) | (1ull << ieee754::traits::mantissaBitCnt); int exp = lhsBiasedExp - rhsBiasedExp + int(ieee754::traits::exponentBias); - uint64_t2 lhsMantissaShifted = impl::shiftMantissaLeftBy53(lhsRealMantissa); - uint64_t mantissa = impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); + uint64_t2 lhsMantissaShifted = emulated_float64_t_impl::shiftMantissaLeftBy53(lhsRealMantissa); + uint64_t mantissa = emulated_float64_t_impl::divmod128by64(lhsMantissaShifted.x, lhsMantissaShifted.y, rhsRealMantissa); - const int msb = impl::_findMSB(mantissa); + const int msb = emulated_float64_t_impl::_findMSB(mantissa); if(msb != -1) { const int shiftAmount = 52 - msb; @@ -326,7 +326,7 @@ namespace hlsl mantissa &= ieee754::traits::mantissaMask; - return bit_cast(impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); + return bit_cast(emulated_float64_t_impl::assembleFloat64(sign, uint64_t(exp) << ieee754::traits::mantissaBitCnt, mantissa)); } else { @@ -340,9 +340,9 @@ namespace hlsl { if (FastMath) { - if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return false; - if (impl::areBothZero(data, rhs.data)) + if (emulated_float64_t_impl::areBothZero(data, rhs.data)) return true; } @@ -354,7 +354,7 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + if (FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) return false; return !(bit_cast(data) == rhs); @@ -363,11 +363,11 @@ namespace hlsl { if (FastMath) { - if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return false; - if (impl::areBothInfinity(data, rhs.data)) + if (emulated_float64_t_impl::areBothInfinity(data, rhs.data)) return false; - if (impl::areBothZero(data, rhs.data)) + if (emulated_float64_t_impl::areBothZero(data, rhs.data)) return false; } @@ -384,11 +384,11 @@ namespace hlsl { if (FastMath) { - if (tgmath::isnan(data) || tgmath::isnan(rhs.data)) + if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return false; - if (impl::areBothInfinity(data, rhs.data)) + if (emulated_float64_t_impl::areBothInfinity(data, rhs.data)) return false; - if (impl::areBothZero(data, rhs.data)) + if (emulated_float64_t_impl::areBothZero(data, rhs.data)) return false; } @@ -403,14 +403,14 @@ namespace hlsl } bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + if (FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) return false; return !(bit_cast(data) > bit_cast(rhs.data)); } bool operator>=(emulated_float64_t rhs) { - if (FastMath && (tgmath::isnan(data) || tgmath::isnan(rhs.data))) + if (FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) return false; return !(bit_cast(data) < bit_cast(rhs.data)); @@ -517,7 +517,7 @@ struct static_cast_helper,void return bit_cast(ieee754::traits::inf); if (exponent < ieee754::traits::exponentMin) return bit_cast(-ieee754::traits::inf); - if (tgmath::isnan(v.data)) + if (tgmath::isNaN(v.data)) return bit_cast(ieee754::traits::quietNaN); } diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 4e5ad6c7be..def46a36c9 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -39,7 +39,7 @@ namespace nbl { namespace hlsl { -namespace impl +namespace emulated_float64_t_impl { NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { @@ -50,26 +50,13 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) return output; } -NBL_CONSTEXPR_INLINE_FUNC uint64_t packFloat64(uint32_t zSign, int zExp, uint32_t zFrac0, uint32_t zFrac1) -{ - uint32_t2 z; - - z.x = zSign + (uint32_t(zExp) << 20) + zFrac0; - z.y = zFrac1; - - uint64_t output = 0u; - output |= (uint64_t(z.x) << 32) & 0xFFFFFFFF00000000ull; - output |= uint64_t(z.y); - return output; -} - template inline uint64_t castFloat32ToStorageType(float32_t val) { if (FlushDenormToZero) { const uint64_t sign = uint64_t(ieee754::extractSign(val)) << 63; - if (tgmath::isinf(val)) + if (tgmath::isInf(val)) return ieee754::traits::inf | sign; uint32_t asUint = ieee754::impl::bitCastToUintType(val); const int f32BiasedExp = ieee754::extractBiasedExponent(val); @@ -178,19 +165,6 @@ NBL_CONSTEXPR_INLINE_FUNC uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) return output; } -NBL_CONSTEXPR_INLINE_FUNC uint64_t propagateFloat64NaN(uint64_t lhs, uint64_t rhs) -{ -#if defined RELAXED_NAN_PROPAGATION - return lhs | rhs; -#else - - lhs |= 0x0008000000000000ull; - rhs |= 0x0008000000000000ull; - return glsl::mix(rhs, glsl::mix(lhs, rhs, tgmath::isnan(rhs)), tgmath::isnan(lhs)); - return 0; -#endif -} - NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t extractedBiasedExponent, uint64_t value) { return extractedBiasedExponent ? value : ieee754::extractSignPreserveBitPattern(value); @@ -201,14 +175,6 @@ NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_ return signShifted | expShifted | mantissa; } -//TODO: remove -static inline void normalizeFloat64Subnormal(uint64_t mantissa, - NBL_REF_ARG(int) outExp, - NBL_REF_ARG(uint64_t) outMantissa) -{ - return; -} - NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 0edb47eb1f..4a78ba9aed 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -12,12 +12,32 @@ namespace nbl { namespace hlsl { - namespace tgmath { +namespace impl +{ + +template::value> +NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) +{ + using AsUint = typename unsigned_integer_of_size::type; + using AsFloat = typename float_of_size::type; + + if (IsTFundamental) + { + return isinf(bit_cast(val)); + } + else + { + AsUint tmp = bit_cast(val); + return (tmp & (~ieee754::traits::signMask)) == ieee754::traits::inf; + } +} + +} template -inline bool isnan(T val) +inline bool isNaN(T val) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -27,13 +47,9 @@ inline bool isnan(T val) } template -NBL_CONSTEXPR_INLINE_FUNC bool isinf(T val) +NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) { - using AsUint = typename unsigned_integer_of_size::type; - using AsFloat = typename float_of_size::type; - - AsUint tmp = bit_cast(val); - return (tmp & (~ieee754::traits::signMask)) == ieee754::traits::inf; + return impl::isInf(val); } } From 52993c4f21d6ff5e030b4901091fb4e3ed4e7aa6 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 14 Sep 2024 22:12:24 +0100 Subject: [PATCH 052/432] Saving work --- .../nbl/builtin/hlsl/emulated/matrix_t.hlsl | 32 +++++++++---------- .../nbl/builtin/hlsl/portable/matrix_t.hlsl | 6 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl b/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl index 241975285e..ba5156c962 100644 --- a/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl @@ -17,17 +17,17 @@ struct emulated_matrix using vec_t = emulated_vector_t2; using this_t = emulated_matrix; - vec_t columns[2]; + vec_t rows[2]; this_t getTransposed() NBL_CONST_MEMBER_FUNC { this_t output; - output.columns[0].x = columns[0].x; - output.columns[1].x = columns[0].y; + output.rows[0].x = rows[0].x; + output.rows[1].x = rows[0].y; - output.columns[0].y = columns[1].x; - output.columns[1].y = columns[1].y; + output.rows[0].y = rows[1].x; + output.rows[1].y = rows[1].y; return output; } @@ -39,30 +39,30 @@ struct emulated_matrix using vec_t = emulated_vector_t3; using this_t = emulated_matrix; - vec_t columns[3]; + vec_t rows[3]; this_t getTransposed() NBL_CONST_MEMBER_FUNC { this_t output; - output.columns[0].x = columns[0].x; - output.columns[1].x = columns[0].y; - output.columns[2].x = columns[0].z; + output.rows[0].x = rows[0].x; + output.rows[1].x = rows[0].y; + output.rows[2].x = rows[0].z; - output.columns[0].y = columns[1].x; - output.columns[1].y = columns[1].y; - output.columns[2].y = columns[1].z; + output.rows[0].y = rows[1].x; + output.rows[1].y = rows[1].y; + output.rows[2].y = rows[1].z; - output.columns[0].z = columns[2].x; - output.columns[1].z = columns[2].y; - output.columns[2].z = columns[2].z; + output.rows[0].z = rows[2].x; + output.rows[1].z = rows[2].y; + output.rows[2].z = rows[2].z; return output; } vec_t operator[](uint32_t columnIdx) { - return columns[columnIdx]; + return rows[columnIdx]; } }; diff --git a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl index 8cf3d8233f..854777e1f8 100644 --- a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl @@ -54,9 +54,9 @@ struct PortableMul64Helper V output; M matTransposed = mat.getTransposed(); - output.x = (matTransposed.columns[0] * vec).calcComponentSum(); - output.y = (matTransposed.columns[1] * vec).calcComponentSum(); - output.z = (matTransposed.columns[2] * vec).calcComponentSum(); + output.x = (matTransposed.rows[0] * vec).calcComponentSum(); + output.y = (matTransposed.rows[1] * vec).calcComponentSum(); + output.z = (matTransposed.rows[2] * vec).calcComponentSum(); return output; } From aa444c474603613ea299b1eb4d62db72e293569a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz <34793522+AnastaZIuk@users.noreply.github.com> Date: Mon, 16 Sep 2024 10:08:05 +0200 Subject: [PATCH 053/432] Update CMakeLists.txt, change file names (vector_t.hlsl & matrix_t.hlsl), CMake had wrong ones --- src/nbl/builtin/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index f06deb7c46..5c3b3faf93 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -234,8 +234,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") #emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") #portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") From 29d07fba8248a4bbd529943df8ce15ed582936a8 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 21 Sep 2024 16:30:56 -0700 Subject: [PATCH 054/432] Fixes --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 2 +- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 407 ++++++++++++++---- .../nbl/builtin/hlsl/portable/float64_t.hlsl | 3 +- .../nbl/builtin/hlsl/portable/matrix_t.hlsl | 8 +- .../nbl/builtin/hlsl/portable/vector_t.hlsl | 36 +- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 2 +- include/nbl/builtin/hlsl/tgmath.hlsl | 23 +- 8 files changed, 377 insertions(+), 106 deletions(-) diff --git a/examples_tests b/examples_tests index 7f7b48596a..cdafbbcc3b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 7f7b48596a61c43d5d72b1e032525cc07ece91a8 +Subproject commit cdafbbcc3b5367e1139a5911f6a798d307128f5a diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 6051cee7b5..6da685654c 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -511,7 +511,7 @@ struct static_cast_helper,void { const int exponent = ieee754::extractExponent(v.data); - if (!From::isFastMathSuppoerted) + if (!From::isFastMathSupported) { if (exponent > ieee754::traits::exponentMax) return bit_cast(ieee754::traits::inf); diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index d4809647e4..8bbc6c4224 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -8,148 +8,396 @@ namespace nbl namespace hlsl { -template -struct emulated_vector {}; +namespace emulated_vector_impl +{ + + +template +struct _2_component_vec +{ + T x; + T y; + + static_assert(sizeof(T) <= 8); + + NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + { + if (componentIdx == 0) + x = val; + if (componentIdx == 1) + y = val; + } + + NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) + { + if (componentIdx == 0) + return x; + if (componentIdx == 1) + return y; + + // TODO: avoid code duplication, make it constexpr + using TAsUint = typename unsigned_integer_of_size::type; + uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + return nbl::hlsl::bit_cast(invalidComponentValue); + } + + NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; +}; + +template +struct _3_component_vec +{ + T x; + T y; + T z; + + + NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + { + if (componentIdx == 0) + x = val; + if (componentIdx == 1) + y = val; + if (componentIdx == 2) + z = val; + } + + NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) + { + if (componentIdx == 0) + return x; + if (componentIdx == 1) + return y; + if (componentIdx == 1) + return z; + + // TODO: avoid code duplication, make it constexpr + using TAsUint = typename unsigned_integer_of_size::type; + uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + return nbl::hlsl::bit_cast(invalidComponentValue); + } + + NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; +}; + +template +struct _4_component_vec +{ + T x; + T y; + T z; + T w; + + NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + { + if (componentIdx == 0) + x = val; + if (componentIdx == 1) + y = val; + if (componentIdx == 2) + z = val; + if (componentIdx == 3) + w = val; + } + + NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) + { + if (componentIdx == 0) + return x; + if (componentIdx == 1) + return y; + if (componentIdx == 1) + return z; + if (componentIdx == 3) + return w; + + // TODO: avoid code duplication, make it constexpr + using TAsUint = typename unsigned_integer_of_size::type; + uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + return nbl::hlsl::bit_cast(invalidComponentValue); + } + + NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; +}; -template -struct emulated_vector +template ::value> +struct emulated_vector : CRTP { - using this_t = emulated_vector; + using this_t = emulated_vector; + using component_t = ComponentType; - EmulatedType x; - EmulatedType y; + NBL_CONSTEXPR_INLINE_FUNC this_t create(this_t other) + { + CRTP output; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, other.getComponent(i)); + } - EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC + template + NBL_CONSTEXPR_INLINE_FUNC this_t create(vector other) { - return x + y; + this_t output; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, other[i]); + + return output; } - NBL_CONSTEXPR_STATIC_INLINE this_t create(EmulatedType x, EmulatedType y) + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(ComponentType val) { this_t output; - output.x = x; - output.y = y; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) + val); return output; } + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + { + this_t output; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - this_t operator+(float rhs) + return output; + } + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) { this_t output; - EmulatedType rhsAsEF64 = EmulatedType::create(rhs); - output.x = x + rhsAsEF64; - output.y = y + rhsAsEF64; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) + other[i]); + + return output; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(ComponentType val) + { + this_t output; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) - val); return output; } + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + { + this_t output; - this_t operator+(EmulatedType rhs) + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) - other.getComponent(i)); + + return output; + } + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) { this_t output; - output.x = x + rhs; - output.y = y + rhs; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) - other[i]); return output; } - this_t operator+(this_t rhs) + NBL_CONSTEXPR_INLINE_FUNC this_t operator*(ComponentType val) { this_t output; - output.x = x + rhs.x; - output.y = y + rhs.y; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) * val); return output; } + NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + { + this_t output; - this_t operator-(float rhs) + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) * other.getComponent(i)); + + return output; + } + NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) { - return create(x, y) + (-rhs); + this_t output; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, this_t::getComponent(i) * other[i]); + + return output; } - this_t operator-(EmulatedType rhs) + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - return create(x, y) + (rhs.flipSign()); + component_t sum = 0; + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + sum = sum + CRTP::getComponent(i); + + return sum; } +}; - this_t operator-(this_t rhs) +#define DEFINE_OPERATORS_FOR_TYPE(TYPE)\ +NBL_CONSTEXPR_INLINE_FUNC this_t operator+(TYPE val)\ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + val);\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_INLINE_FUNC this_t operator-(TYPE val)\ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) - val);\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_INLINE_FUNC this_t operator*(TYPE val)\ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) * val);\ +\ + return output;\ +}\ +\ + +template +struct emulated_vector : CRTP +{ + using component_t = ComponentType; + using this_t = emulated_vector; + + NBL_CONSTEXPR_INLINE_FUNC this_t create(this_t other) { - rhs.x = rhs.x.flipSign(); - rhs.y = rhs.y.flipSign(); - return create(x, y) + rhs; + CRTP output; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, other.getComponent(i)); } - this_t operator*(float rhs) + template + NBL_CONSTEXPR_INLINE_FUNC this_t create(vector other) { this_t output; - EmulatedType rhsAsEF64 = EmulatedType::create(rhs); - output.x = x * rhsAsEF64; - output.y = y * rhsAsEF64; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, ComponentType::create(other[i])); return output; } - this_t operator*(EmulatedType rhs) + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) { this_t output; - output.x = x * rhs; - output.y = y * rhs; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i)); return output; } + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + { + this_t output; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); - this_t operator*(this_t rhs) + return output; + } + NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) { this_t output; - output.x = x * rhs.x; - output.y = y * rhs.y; + + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); return output; } + + DEFINE_OPERATORS_FOR_TYPE(float32_t) + DEFINE_OPERATORS_FOR_TYPE(float64_t) + DEFINE_OPERATORS_FOR_TYPE(uint16_t) + DEFINE_OPERATORS_FOR_TYPE(uint32_t) + DEFINE_OPERATORS_FOR_TYPE(uint64_t) + DEFINE_OPERATORS_FOR_TYPE(int16_t) + DEFINE_OPERATORS_FOR_TYPE(int32_t) + DEFINE_OPERATORS_FOR_TYPE(int64_t) + + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + { + ComponentType sum = ComponentType::create(0); + for (uint32_t i = 0u; i < CRTP::Dimension; ++i) + sum = sum + CRTP::getComponent(i); + + return sum; + } }; -template -struct emulated_vector -{ - using this_t = emulated_vector; +#undef DEFINE_OPERATORS_FOR_TYPE - EmulatedType x; - EmulatedType y; - EmulatedType z; +} - EmulatedType calcComponentSum() NBL_CONST_MEMBER_FUNC +template +struct emulated_vector_t {}; +template +struct emulated_vector_t : emulated_vector_impl::emulated_vector > {}; +template +struct emulated_vector_t : emulated_vector_impl::emulated_vector > {}; +template +struct emulated_vector_t : emulated_vector_impl::emulated_vector > {}; + +template +using emulated_vector_t2 = emulated_vector_impl::emulated_vector >; +template +using emulated_vector_t3 = emulated_vector_impl::emulated_vector >; +template +using emulated_vector_t4 = emulated_vector_impl::emulated_vector >; + +template +struct array_get +{ + T operator()(I index, NBL_CONST_REF_ARG(U) arr) { - return x + y + z; + return arr[index]; } +}; - this_t operator*(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +template +struct array_get::component_t, emulated_vector_t, uint32_t> +{ + T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec) { - this_t output; - output.x = x * rhs.x; - output.y = y * rhs.y; - output.z = z * rhs.z; + return vec.getComponent(index); + } +}; - return output; +template +struct array_set +{ + void operator()(I index, NBL_REF_ARG(U) arr, T val) + { + arr[index] = val; } }; -template -struct emulated_vector +template +struct array_set, uint32_t> { - using type = emulated_vector; + using type_t = T; - EmulatedType x; - EmulatedType y; - EmulatedType z; - EmulatedType w; + T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec, T value) + { + vec.setComponent(index, value); + } }; -template -using emulated_vector_t2 = emulated_vector; -template -using emulated_vector_t3 = emulated_vector; -template -using emulated_vector_t4 = emulated_vector; - namespace impl { template @@ -157,7 +405,11 @@ struct static_cast_helper, vector, void> { static inline emulated_vector_t2 cast(vector vec) { - return emulated_vector_t2(_static_cast(vec.x), _static_cast(vec.y)); + emulated_vector_t2 output; + output.x = _static_cast(vec.x); + output.y = _static_cast(vec.y); + + return output; } }; @@ -166,7 +418,12 @@ struct static_cast_helper, vector, void> { static inline emulated_vector_t3 cast(vector vec) { - return emulated_vector_t3(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z)); + emulated_vector_t2 output; + output.x = _static_cast(vec.x); + output.y = _static_cast(vec.y); + output.z = _static_cast(vec.z); + + return output; } }; @@ -175,7 +432,13 @@ struct static_cast_helper, vector, void> { static inline emulated_vector_t4 cast(vector vec) { - return emulated_vector_t4(_static_cast(vec.x), _static_cast(vec.y), _static_cast(vec.z), _static_cast(vec.w)); + emulated_vector_t2 output; + output.x = _static_cast(vec.x); + output.y = _static_cast(vec.y); + output.z = _static_cast(vec.z); + output.w = _static_cast(vec.w); + + return output; } }; } diff --git a/include/nbl/builtin/hlsl/portable/float64_t.hlsl b/include/nbl/builtin/hlsl/portable/float64_t.hlsl index 92b6e53133..131d37a87b 100644 --- a/include/nbl/builtin/hlsl/portable/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/float64_t.hlsl @@ -9,7 +9,8 @@ namespace hlsl { template #ifdef __HLSL_VERSION -using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; +//using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; +using portable_float64_t = typename conditional::shaderFloat64, emulated_float64_t, emulated_float64_t >::type; #else using portable_float64_t = float64_t; #endif diff --git a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl index 854777e1f8..a01c16748d 100644 --- a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl @@ -33,14 +33,14 @@ using portable_matrix_t3x3 = portable_matrix_t; #ifdef __HLSL_VERSION template -using portable_matrix64_t2x2 = portable_matrix_t2x2 >; +using portable_float64_t2x2 = portable_matrix_t2x2 >; template -using portable_matrix64_t3x3 = portable_matrix_t3x3 >; +using portable_float64_t3x3 = portable_matrix_t3x3 >; #else template -using portable_matrix64_t2x2 = portable_matrix_t2x2; +using portable_float64_t2x2 = portable_matrix_t2x2; template -using portable_matrix64_t3x3 = portable_matrix_t3x3; +using portable_float64_t3x3 = portable_matrix_t3x3; #endif namespace impl diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index 474975fb21..74df16579b 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -9,6 +9,8 @@ namespace nbl namespace hlsl { +namespace portable_vector_impl +{ template::value> struct portable_vector { @@ -18,12 +20,30 @@ struct portable_vector template struct portable_vector { - using type = portable_vector; + using type = emulated_vector_t; }; + +template +struct portable_vector +{ + using type = emulated_vector_t2; +}; +template +struct portable_vector +{ + using type = emulated_vector_t3; +}; +template +struct portable_vector +{ + using type = emulated_vector_t4; +}; + #endif +} template -using portable_vector_t = typename portable_vector::type; +using portable_vector_t = typename portable_vector_impl::portable_vector::type; template using portable_vector_t2 = portable_vector_t; @@ -34,18 +54,18 @@ using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION template -using portable_vector64_t2 = portable_vector_t2 >; +using portable_float64_t2 = portable_vector_t2 >; template -using portable_vector64_t3 = portable_vector_t3 >; +using portable_float64_t3 = portable_vector_t3 >; template -using portable_vector64_t4 = portable_vector_t4 >; +using portable_float64_t4 = portable_vector_t4 >; #else template -using portable_vector64_t2 = portable_vector_t2; +using portable_float64_t2 = portable_vector_t2; template -using portable_vector64_t3 = portable_vector_t3; +using portable_float64_t3 = portable_vector_t3; template -using portable_vector64_t4 = portable_vector_t4; +using portable_float64_t4 = portable_vector_t4; #endif } diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index 936a37fa7e..fa009074e0 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -517,7 +517,7 @@ static math::equations::Quartic getBezierBezierIntersectionEquation(NBL { using float_t2 = portable_vector_t2; using float64 = portable_float64_t; - using float64_vec2 = portable_vector64_t2; + using float64_vec2 = portable_float64_t2; // Algorithm based on Computer Aided Geometric Design: diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index 4a78ba9aed..68db6e932f 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -17,23 +17,6 @@ namespace tgmath namespace impl { -template::value> -NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) -{ - using AsUint = typename unsigned_integer_of_size::type; - using AsFloat = typename float_of_size::type; - - if (IsTFundamental) - { - return isinf(bit_cast(val)); - } - else - { - AsUint tmp = bit_cast(val); - return (tmp & (~ieee754::traits::signMask)) == ieee754::traits::inf; - } -} - } template @@ -49,7 +32,11 @@ inline bool isNaN(T val) template NBL_CONSTEXPR_INLINE_FUNC bool isInf(T val) { - return impl::isInf(val); + using AsUint = typename unsigned_integer_of_size::type; + using AsFloat = typename float_of_size::type; + + AsUint tmp = bit_cast(val); + return (tmp & (~ieee754::traits::signMask)) == ieee754::traits::inf; } } From 734b8eb60638fd32f49cd545c965a5779d0dc034 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 23 Sep 2024 15:43:54 -0700 Subject: [PATCH 055/432] More fixes --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 40 +----- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 6 +- .../nbl/builtin/hlsl/emulated/matrix_t.hlsl | 4 +- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 136 +++++++++++------- .../nbl/builtin/hlsl/portable/matrix_t.hlsl | 4 +- .../nbl/builtin/hlsl/portable/vector_t.hlsl | 1 - 7 files changed, 96 insertions(+), 97 deletions(-) diff --git a/examples_tests b/examples_tests index cdafbbcc3b..aebab4a34f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit cdafbbcc3b5367e1139a5911f6a798d307128f5a +Subproject commit aebab4a34f486f2ab7556a1fa47b33ddca5c7e83 diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 6da685654c..8302b5405e 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -503,7 +503,6 @@ struct static_cast_helper,void using ToAsFloat = typename float_of_size::type; using ToAsUint = typename unsigned_integer_of_size::type; - if (is_same_v) return To(bit_cast(v.data)); @@ -563,45 +562,12 @@ struct static_cast_helper,void } }; -template -struct static_cast_helper, float32_t, void> -{ - using To = emulated_float64_t; - - static inline To cast(float32_t v) - { - return To::create(v); - } -}; - -template -struct static_cast_helper, float64_t, void> -{ - using To = emulated_float64_t; - - static inline To cast(float64_t v) - { - return To::create(v); - } -}; - -template -struct static_cast_helper, uint32_t, void> +template +struct static_cast_helper, From, void> { using To = emulated_float64_t; - static inline To cast(uint32_t v) - { - return To::create(v); - } -}; - -template -struct static_cast_helper, uint64_t, void> -{ - using To = emulated_float64_t; - - static inline To cast(uint64_t v) + static inline To cast(From v) { return To::create(v); } diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index def46a36c9..8f6e37f26a 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -59,10 +59,10 @@ inline uint64_t castFloat32ToStorageType(float32_t val) if (tgmath::isInf(val)) return ieee754::traits::inf | sign; uint32_t asUint = ieee754::impl::bitCastToUintType(val); - const int f32BiasedExp = ieee754::extractBiasedExponent(val); - if (f32BiasedExp == 0) + const int f32Exp = ieee754::extractExponent(val); + if (f32Exp == 0) return sign; - const uint64_t biasedExp = uint64_t(f32BiasedExp - ieee754::traits::exponentBias + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t biasedExp = uint64_t(f32Exp + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt); return sign | biasedExp | mantissa; diff --git a/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl b/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl index ba5156c962..2dde2bd90c 100644 --- a/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl @@ -60,9 +60,9 @@ struct emulated_matrix return output; } - vec_t operator[](uint32_t columnIdx) + vec_t operator[](uint32_t rowIdx) { - return rows[columnIdx]; + return rows[rowIdx]; } }; diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 8bbc6c4224..fe7ccaec42 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -68,7 +68,7 @@ struct _3_component_vec return x; if (componentIdx == 1) return y; - if (componentIdx == 1) + if (componentIdx == 2) return z; // TODO: avoid code duplication, make it constexpr @@ -106,7 +106,7 @@ struct _4_component_vec return x; if (componentIdx == 1) return y; - if (componentIdx == 1) + if (componentIdx == 2) return z; if (componentIdx == 3) return w; @@ -126,16 +126,14 @@ struct emulated_vector : CRTP using this_t = emulated_vector; using component_t = ComponentType; - NBL_CONSTEXPR_INLINE_FUNC this_t create(this_t other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { CRTP output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) output.setComponent(i, other.getComponent(i)); } - - template - NBL_CONSTEXPR_INLINE_FUNC this_t create(vector other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) { this_t output; @@ -145,7 +143,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(ComponentType val) + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) { this_t output; @@ -163,7 +161,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) { this_t output; @@ -173,12 +171,12 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(ComponentType val) + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) { this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) - val); + output.setComponent(i, CRTP::getComponent(i) - val); return output; } @@ -187,26 +185,26 @@ struct emulated_vector : CRTP this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) - other.getComponent(i)); + output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) { this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) - other[i]); + output.setComponent(i, CRTP::getComponent(i) - other[i]); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(ComponentType val) + NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) { this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) * val); + output.setComponent(i, CRTP::getComponent(i) * val); return output; } @@ -215,16 +213,16 @@ struct emulated_vector : CRTP this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) * other.getComponent(i)); + output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) + NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) { this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) * other[i]); + output.setComponent(i, CRTP::getComponent(i) * other[i]); return output; } @@ -244,7 +242,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(TYPE val)\ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + val);\ + output.setComponent(i, CRTP::getComponent(i) + component_t::create(val));\ \ return output;\ }\ @@ -253,7 +251,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(TYPE val)\ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) - val);\ + output.setComponent(i, CRTP::getComponent(i) - component_t::create(val));\ \ return output;\ }\ @@ -262,19 +260,20 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(TYPE val)\ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) * val);\ + output.setComponent(i, CRTP::getComponent(i) * component_t::create(val));\ \ return output;\ }\ \ +// TODO: some of code duplication could be avoided template struct emulated_vector : CRTP { using component_t = ComponentType; using this_t = emulated_vector; - NBL_CONSTEXPR_INLINE_FUNC this_t create(this_t other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { CRTP output; @@ -283,7 +282,7 @@ struct emulated_vector : CRTP } template - NBL_CONSTEXPR_INLINE_FUNC this_t create(vector other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) { this_t output; @@ -340,25 +339,39 @@ struct emulated_vector : CRTP } }; -#undef DEFINE_OPERATORS_FOR_TYPE - -} - template -struct emulated_vector_t {}; +struct CRTPParentStructSelector +{ + using type = void; +}; template -struct emulated_vector_t : emulated_vector_impl::emulated_vector > {}; +struct CRTPParentStructSelector +{ + using type = _2_component_vec; +}; template -struct emulated_vector_t : emulated_vector_impl::emulated_vector > {}; +struct CRTPParentStructSelector +{ + using type = _3_component_vec; +}; template -struct emulated_vector_t : emulated_vector_impl::emulated_vector > {}; +struct CRTPParentStructSelector +{ + using type = _4_component_vec; +}; +#undef DEFINE_OPERATORS_FOR_TYPE + +} + +template +using emulated_vector_t = emulated_vector_impl::emulated_vector::type>; template -using emulated_vector_t2 = emulated_vector_impl::emulated_vector >; +using emulated_vector_t2 = emulated_vector_impl::emulated_vector::type>; template -using emulated_vector_t3 = emulated_vector_impl::emulated_vector >; +using emulated_vector_t3 = emulated_vector_impl::emulated_vector::type>; template -using emulated_vector_t4 = emulated_vector_impl::emulated_vector >; +using emulated_vector_t4 = emulated_vector_impl::emulated_vector::type>; template struct array_get @@ -369,14 +382,15 @@ struct array_get } }; -template -struct array_get::component_t, emulated_vector_t, uint32_t> -{ - T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec) - { - return vec.getComponent(index); - } -}; +// TODO: fix +//template +//struct array_get::component_t, emulated_vector_t, uint32_t> +//{ +// T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec) +// { +// return vec.getComponent(index); +// } +//}; template struct array_set @@ -387,16 +401,17 @@ struct array_set } }; -template -struct array_set, uint32_t> -{ - using type_t = T; - - T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec, T value) - { - vec.setComponent(index, value); - } -}; +// TODO: fix +//template +//struct array_set, uint32_t> +//{ +// using type_t = T; +// +// T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec, T value) +// { +// vec.setComponent(index, value); +// } +//}; namespace impl { @@ -441,6 +456,23 @@ struct static_cast_helper, vector, void> return output; } }; + +template +struct static_cast_helper, emulated_vector_t, void> +{ + using OutputVecType = vector; + using InputVecType = emulated_vector_t; + + static inline OutputVecType cast(InputVecType vec) + { + OutputVecType output; + output.x = _static_cast(vec.x); + output.y = _static_cast(vec.y); + + return output; + } +}; + } } diff --git a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl index a01c16748d..e3c805d367 100644 --- a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl @@ -46,13 +46,15 @@ using portable_float64_t3x3 = portable_matrix_t3x3; namespace impl { // TODO: move to emulated/matrix.hlsl +// TODO: make one template for all dimensions template struct PortableMul64Helper { static inline V multiply(M mat, V vec) { V output; - M matTransposed = mat.getTransposed(); + //M matTransposed = mat.getTransposed(); + M matTransposed = mat; output.x = (matTransposed.rows[0] * vec).calcComponentSum(); output.y = (matTransposed.rows[1] * vec).calcComponentSum(); diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index 74df16579b..8a558be581 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -8,7 +8,6 @@ namespace nbl { namespace hlsl { - namespace portable_vector_impl { template::value> From ab95289b42b704df23ab7e394097b26839fc6528 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 25 Sep 2024 11:55:31 -0700 Subject: [PATCH 056/432] Even more fixes --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 29 +++++++++++- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 6 +-- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 44 ++++++++++++++++--- .../nbl/builtin/hlsl/portable/float64_t.hlsl | 4 +- .../nbl/builtin/hlsl/portable/matrix_t.hlsl | 8 ++-- .../nbl/builtin/hlsl/portable/vector_t.hlsl | 16 ------- 7 files changed, 74 insertions(+), 35 deletions(-) diff --git a/examples_tests b/examples_tests index aebab4a34f..62f8a5403c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit aebab4a34f486f2ab7556a1fa47b33ddca5c7e83 +Subproject commit 62f8a5403c990edb114fa7e2f752ec89b815bbce diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 8302b5405e..af6dd1482c 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -69,7 +69,7 @@ namespace hlsl emulated_float64_t retval; uint32_t lo, hi; asuint(val, lo, hi); - retval.data = (uint64_t(hi) << 32) | lo; + retval.data = (uint64_t(hi) << 32) | uint64_t(lo); return retval; #else return bit_cast(reinterpret_cast(val)); @@ -85,6 +85,15 @@ namespace hlsl // arithmetic operators this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + // TODO: REMOVE! + float64_t sum = bit_cast(data) + bit_cast(rhs.data); + uint64_t sumAsUint = bit_cast(sum); + + this_t output2; + output2.data = sumAsUint; + + return output2; + if (FlushDenormToZero) { if(FastMath) @@ -217,6 +226,12 @@ namespace hlsl emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + float64_t sum = bit_cast(data) * bit_cast(rhs.data); + uint64_t sumAsUint = bit_cast(sum); + + this_t output2; + output2.data = sumAsUint; + if(FlushDenormToZero) { emulated_float64_t retval = this_t::create(0ull); @@ -487,6 +502,18 @@ output.data = bit_cast(val);\ return output;\ }\ \ +template<>\ +NBL_CONSTEXPR_FUNC uint64_t bit_cast(NBL_CONST_REF_ARG( __VA_ARGS__ ) val)\ +{\ +return val.data;\ +}\ +\ +template<>\ +NBL_CONSTEXPR_FUNC float64_t bit_cast(NBL_CONST_REF_ARG( __VA_ARGS__ ) val)\ +{\ +return bit_cast(val.data);\ +}\ +\ namespace impl { diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 8f6e37f26a..6ef891e58f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -59,10 +59,10 @@ inline uint64_t castFloat32ToStorageType(float32_t val) if (tgmath::isInf(val)) return ieee754::traits::inf | sign; uint32_t asUint = ieee754::impl::bitCastToUintType(val); - const int f32Exp = ieee754::extractExponent(val); - if (f32Exp == 0) + const int f32BiasedExp = int(ieee754::extractBiasedExponent(val)); + if (f32BiasedExp == 0) return sign; - const uint64_t biasedExp = uint64_t(f32Exp + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); + const uint64_t biasedExp = uint64_t(f32BiasedExp - ieee754::traits::exponentBias + ieee754::traits::exponentBias) << (ieee754::traits::mantissaBitCnt); const uint64_t mantissa = (uint64_t(ieee754::traits::mantissaMask) & asUint) << (ieee754::traits::mantissaBitCnt - ieee754::traits::mantissaBitCnt); return sign | biasedExp | mantissa; diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index fe7ccaec42..537171ff3e 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -339,6 +339,8 @@ struct emulated_vector : CRTP } }; +#undef DEFINE_OPERATORS_FOR_TYPE + template struct CRTPParentStructSelector { @@ -360,8 +362,6 @@ struct CRTPParentStructSelector using type = _4_component_vec; }; -#undef DEFINE_OPERATORS_FOR_TYPE - } template @@ -373,20 +373,50 @@ using emulated_vector_t3 = emulated_vector_impl::emulated_vector using emulated_vector_t4 = emulated_vector_impl::emulated_vector::type>; -template +// TODO: better implementation +template +struct is_valid_emulated_vector +{ + NBL_CONSTEXPR_STATIC bool value = is_same_v > || + is_same_v > || + is_same_v >; +}; + +#ifdef __HLSL_VERSION +template struct array_get { - T operator()(I index, NBL_CONST_REF_ARG(U) arr) + T operator()(NBL_CONST_REF_ARG(U) vec, const I ix) { - return arr[index]; + return vec[ix]; } }; -// TODO: fix +template +struct array_get, TT, I> +{ + TT operator()(NBL_CONST_REF_ARG(emulated_vector_t) vec, const I ix) + { + return vec.getComponent(ix); + } +}; +#endif + +//template +//struct array_get +//{ +// T operator()(I index, NBL_CONST_REF_ARG(U) arr) +// { +// return arr[index]; +// } +//}; +// //template //struct array_get::component_t, emulated_vector_t, uint32_t> //{ -// T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec) +// using vec_t = emulated_vector_t; +// +// T operator()(uint32_t index, NBL_CONST_REF_ARG(vec_t) vec) // { // return vec.getComponent(index); // } diff --git a/include/nbl/builtin/hlsl/portable/float64_t.hlsl b/include/nbl/builtin/hlsl/portable/float64_t.hlsl index 131d37a87b..c5389ef8cc 100644 --- a/include/nbl/builtin/hlsl/portable/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/float64_t.hlsl @@ -9,8 +9,8 @@ namespace hlsl { template #ifdef __HLSL_VERSION -//using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; -using portable_float64_t = typename conditional::shaderFloat64, emulated_float64_t, emulated_float64_t >::type; +using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; +//using portable_float64_t = typename conditional::shaderFloat64, emulated_float64_t, emulated_float64_t >::type; #else using portable_float64_t = float64_t; #endif diff --git a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl index e3c805d367..c0b2596cb5 100644 --- a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl @@ -53,12 +53,10 @@ struct PortableMul64Helper static inline V multiply(M mat, V vec) { V output; - //M matTransposed = mat.getTransposed(); - M matTransposed = mat; - output.x = (matTransposed.rows[0] * vec).calcComponentSum(); - output.y = (matTransposed.rows[1] * vec).calcComponentSum(); - output.z = (matTransposed.rows[2] * vec).calcComponentSum(); + output.x = (mat.rows[0] * vec).calcComponentSum(); + output.y = (mat.rows[1] * vec).calcComponentSum(); + output.z = (mat.rows[2] * vec).calcComponentSum(); return output; } diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index 8a558be581..ace199e20b 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -22,22 +22,6 @@ struct portable_vector using type = emulated_vector_t; }; -template -struct portable_vector -{ - using type = emulated_vector_t2; -}; -template -struct portable_vector -{ - using type = emulated_vector_t3; -}; -template -struct portable_vector -{ - using type = emulated_vector_t4; -}; - #endif } From 02fe0ae2232cce1aa7d31d02a475356c9cf6fac9 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:12:11 +0100 Subject: [PATCH 057/432] Update shaderc --- 3rdparty/shaderc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/shaderc b/3rdparty/shaderc index e72186b66b..e166325b24 160000 --- a/3rdparty/shaderc +++ b/3rdparty/shaderc @@ -1 +1 @@ -Subproject commit e72186b66bb90ed06aaf15cbdc9a053581a0616b +Subproject commit e166325b24d79d64bfa47065328890ce116ea642 From f4328c977e899fb2e56ff5f24fc265103997fa87 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:13:11 +0100 Subject: [PATCH 058/432] Update glslang --- 3rdparty/glslang | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/glslang b/3rdparty/glslang index f397c9b6e9..7bc35fa99c 160000 --- a/3rdparty/glslang +++ b/3rdparty/glslang @@ -1 +1 @@ -Subproject commit f397c9b6e90bc53aa2e9feaef1a9cdf20ca43298 +Subproject commit 7bc35fa99cb8572715bea9d3e977f9b27423337b From 71f4a8bea547fdadb0d0fcf1e3481913b89c31bc Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 27 Sep 2024 05:20:43 -0700 Subject: [PATCH 059/432] Saving work --- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 80 +++++++++---------- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 62 +++++++++++++- .../nbl/builtin/hlsl/portable/float64_t.hlsl | 4 +- 3 files changed, 100 insertions(+), 46 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index af6dd1482c..9869cb00e5 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -86,13 +86,13 @@ namespace hlsl this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { // TODO: REMOVE! - float64_t sum = bit_cast(data) + bit_cast(rhs.data); + /*float64_t sum = bit_cast(data) + bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); this_t output2; output2.data = sumAsUint; - return output2; + return output2;*/ if (FlushDenormToZero) { @@ -157,9 +157,8 @@ namespace hlsl swap(lhsSign, rhsSign); } - rhsNormMantissa >>= shiftAmount; - uint64_t resultMantissa; + uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; if (lhsSign != rhsSign) { int64_t mantissaDiff = lhsNormMantissa - rhsNormMantissa; @@ -169,31 +168,27 @@ namespace hlsl swap(lhsSign, rhsSign); } - lhsNormMantissa <<= 10; - rhsNormMantissa <<= 10; - resultMantissa = uint64_t(int64_t(lhsNormMantissa) - int64_t(rhsNormMantissa)); - resultMantissa >>= 10; + resultMantissa = emulated_float64_t_impl::subMantissas128NormalizeResult(lhsNormMantissa, rhsNormMantissa, shiftAmount, resultBiasedExp); + + if (resultMantissa == 0ull); + return _static_cast(0ull); } else { + rhsNormMantissa >>= shiftAmount; resultMantissa = lhsNormMantissa + rhsNormMantissa; - } - - uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; - if (resultMantissa == 0ull) - return _static_cast(0ull); - - if (resultMantissa & 1ull << 53) - { - ++resultBiasedExp; - resultMantissa >>= 1; - } + if (resultMantissa & 1ull << 53) + { + ++resultBiasedExp; + resultMantissa >>= 1; + } - while (resultMantissa < (1ull << 52)) - { - --resultBiasedExp; - resultMantissa <<= 1; + while (resultMantissa < (1ull << 52)) + { + --resultBiasedExp; + resultMantissa <<= 1; + } } resultMantissa &= ieee754::traits::mantissaMask; @@ -226,12 +221,6 @@ namespace hlsl emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - float64_t sum = bit_cast(data) * bit_cast(rhs.data); - uint64_t sumAsUint = bit_cast(sum); - - this_t output2; - output2.data = sumAsUint; - if(FlushDenormToZero) { emulated_float64_t retval = this_t::create(0ull); @@ -244,12 +233,12 @@ namespace hlsl uint64_t lhsSign = lhsData & ieee754::traits::signMask; uint64_t rhsSign = rhsData & ieee754::traits::signMask; + uint64_t sign = (lhsData ^ rhsData) & ieee754::traits::signMask; uint64_t lhsMantissa = ieee754::extractMantissa(lhsData); uint64_t rhsMantissa = ieee754::extractMantissa(rhsData); int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; - uint64_t sign = (lhsData ^ rhsData) & ieee754::traits::signMask; if (FastMath) { if (tgmath::isNaN(lhsData) || tgmath::isNaN(rhsData)) @@ -260,6 +249,8 @@ namespace hlsl return bit_cast(sign); } + if (emulated_float64_t_impl::isZero(lhsData) || emulated_float64_t_impl::isZero(rhsData)) + return bit_cast(sign); const uint64_t hi_l = (lhsMantissa >> 21) | (1ull << 31); const uint64_t lo_l = lhsMantissa & ((1ull << 21) - 1); @@ -300,27 +291,30 @@ namespace hlsl { const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; + int lhsBiasedExp = ieee754::extractBiasedExponent(data); + int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); + + uint64_t lhsData = emulated_float64_t_impl::flushDenormToZero(lhsBiasedExp, data); + uint64_t rhsData = emulated_float64_t_impl::flushDenormToZero(rhsBiasedExp, rhs.data); + if(FastMath) { - if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) + if (tgmath::isNaN(lhsData) || tgmath::isNaN(rhsData)) return bit_cast(ieee754::traits::quietNaN); - if (emulated_float64_t_impl::isZero(rhs.data)) + if (emulated_float64_t_impl::areBothZero(lhsData, rhsData)) return bit_cast(ieee754::traits::quietNaN | sign); - if (emulated_float64_t_impl::areBothInfinity(data, rhs.data)) + if (emulated_float64_t_impl::isZero(rhsData)) + return bit_cast(ieee754::traits::inf | sign); + if (emulated_float64_t_impl::areBothInfinity(lhsData, rhsData)) return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); - if (tgmath::isInf(data)) + if (tgmath::isInf(lhsData)) return bit_cast(ieee754::traits::inf | sign); - if (tgmath::isInf(rhs.data)) - return bit_cast(0ull | sign); - if (emulated_float64_t_impl::isZero(rhs.data)) - return bit_cast(ieee754::traits::quietNaN | sign); + if (tgmath::isInf(rhsData)) + return bit_cast(sign); } - int lhsBiasedExp = ieee754::extractBiasedExponent(data); - int rhsBiasedExp = ieee754::extractBiasedExponent(rhs.data); - - uint64_t lhsData = emulated_float64_t_impl::flushDenormToZero(lhsBiasedExp, data); - uint64_t rhsData = emulated_float64_t_impl::flushDenormToZero(rhsBiasedExp, rhs.data); + if (emulated_float64_t_impl::isZero(lhsData)) + return bit_cast(sign); const uint64_t lhsRealMantissa = (ieee754::extractMantissa(lhsData) | (1ull << ieee754::traits::mantissaBitCnt)); const uint64_t rhsRealMantissa = ieee754::extractMantissa(rhsData) | (1ull << ieee754::traits::mantissaBitCnt); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 6ef891e58f..0248a679b6 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -193,8 +193,9 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) return ((lhs << 1) == 0ull) && (lhs == rhs); } +// TODO: remove, use Newton-Raphson instead // returns pair of quotient and remainder -static inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) +inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t dividentLow, uint64_t divisor) { const uint64_t b = 1ull << 32; uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; @@ -258,6 +259,65 @@ static inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t return (q1 << 32) | q0; } + +inline uint64_t subMantissas128NormalizeResult(const uint64_t greaterNumberMantissa, const uint64_t lesserNumberMantissa, const int shiftAmount, NBL_REF_ARG(uint64_t) resultBiasedExp) +{ + uint64_t greaterHigh, greaterLow, lesserHigh, lesserLow; + greaterHigh = greaterNumberMantissa << 9; + greaterLow = 0ull; + lesserHigh = lesserNumberMantissa << 9; + resultBiasedExp += 9; + + const uint64_t mask = (1ull << shiftAmount) - 1ull; + const uint64_t lostBits = lesserHigh & mask; + lesserLow = lostBits << (63 - shiftAmount); + lesserHigh >>= shiftAmount; + + uint64_t diffHigh, diffLow; + diffHigh = greaterHigh - lesserHigh; + diffLow = greaterLow - lesserLow; + + if (diffLow > greaterLow) + --diffHigh; + + int msbIdx = _findMSB(diffHigh); + if (msbIdx == -1) + { + msbIdx = _findMSB(diffLow); + if (msbIdx == -1) + return 0ull; // TODO: for sure? + } + else + { + msbIdx += 64; + } + + // TODO: optimize + while (msbIdx > 52) + { + uint64_t lostBit = (diffHigh & 0x1ull) << 63; + diffHigh >>= 1; + diffLow >>= 1; + diffLow |= lostBit; + + --resultBiasedExp; + --msbIdx; + } + + while (msbIdx < 52) + { + uint64_t lostBit = (diffLow >> 63) & 0x1ull; + diffHigh <<= 1; + diffHigh |= lostBit; + diffLow <<= 1; + + ++resultBiasedExp; + ++msbIdx; + } + + return diffLow; +} + } } } diff --git a/include/nbl/builtin/hlsl/portable/float64_t.hlsl b/include/nbl/builtin/hlsl/portable/float64_t.hlsl index c5389ef8cc..131d37a87b 100644 --- a/include/nbl/builtin/hlsl/portable/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/float64_t.hlsl @@ -9,8 +9,8 @@ namespace hlsl { template #ifdef __HLSL_VERSION -using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; -//using portable_float64_t = typename conditional::shaderFloat64, emulated_float64_t, emulated_float64_t >::type; +//using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; +using portable_float64_t = typename conditional::shaderFloat64, emulated_float64_t, emulated_float64_t >::type; #else using portable_float64_t = float64_t; #endif From ee1289887bab238c47f28b29b1c228a1a5281b61 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 27 Sep 2024 20:58:53 -0700 Subject: [PATCH 060/432] Modified add operator --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 19 +++----- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 46 +++++++------------ 3 files changed, 25 insertions(+), 42 deletions(-) diff --git a/examples_tests b/examples_tests index 62f8a5403c..8c873fabf5 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 62f8a5403c990edb114fa7e2f752ec89b815bbce +Subproject commit 8c873fabf5fbcf893e33b828844fd593d4985f9e diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 9869cb00e5..d01b73f1a3 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -147,7 +147,7 @@ namespace hlsl const int expDiff = lhsBiasedExp - rhsBiasedExp; - const int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; + int exp = max(lhsBiasedExp, rhsBiasedExp) - ieee754::traits::exponentBias; const uint32_t shiftAmount = abs(expDiff); if (expDiff < 0) @@ -157,8 +157,9 @@ namespace hlsl swap(lhsSign, rhsSign); } + rhsNormMantissa >>= shiftAmount; + uint64_t resultMantissa; - uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; if (lhsSign != rhsSign) { int64_t mantissaDiff = lhsNormMantissa - rhsNormMantissa; @@ -168,29 +169,23 @@ namespace hlsl swap(lhsSign, rhsSign); } - resultMantissa = emulated_float64_t_impl::subMantissas128NormalizeResult(lhsNormMantissa, rhsNormMantissa, shiftAmount, resultBiasedExp); + resultMantissa = emulated_float64_t_impl::subMantissas128NormalizeResult(lhsNormMantissa, rhsNormMantissa, exp); - if (resultMantissa == 0ull); + if (resultMantissa == 0ull) return _static_cast(0ull); } else { - rhsNormMantissa >>= shiftAmount; resultMantissa = lhsNormMantissa + rhsNormMantissa; if (resultMantissa & 1ull << 53) { - ++resultBiasedExp; + ++exp; resultMantissa >>= 1; } - - while (resultMantissa < (1ull << 52)) - { - --resultBiasedExp; - resultMantissa <<= 1; - } } + uint64_t resultBiasedExp = uint64_t(exp) + ieee754::traits::exponentBias; resultMantissa &= ieee754::traits::mantissaMask; uint64_t output = emulated_float64_t_impl::assembleFloat64(lhsSign, resultBiasedExp << ieee754::traits::mantissaBitCnt, resultMantissa); return bit_cast(output); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 0248a679b6..ccc49cd904 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -260,18 +260,13 @@ inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t divide return (q1 << 32) | q0; } -inline uint64_t subMantissas128NormalizeResult(const uint64_t greaterNumberMantissa, const uint64_t lesserNumberMantissa, const int shiftAmount, NBL_REF_ARG(uint64_t) resultBiasedExp) +inline uint64_t subMantissas128NormalizeResult(const uint64_t greaterNumberMantissa, const uint64_t lesserNumberMantissa, NBL_REF_ARG(int) resultExp) { uint64_t greaterHigh, greaterLow, lesserHigh, lesserLow; - greaterHigh = greaterNumberMantissa << 9; + greaterHigh = greaterNumberMantissa; greaterLow = 0ull; - lesserHigh = lesserNumberMantissa << 9; - resultBiasedExp += 9; - - const uint64_t mask = (1ull << shiftAmount) - 1ull; - const uint64_t lostBits = lesserHigh & mask; - lesserLow = lostBits << (63 - shiftAmount); - lesserHigh >>= shiftAmount; + lesserHigh = lesserNumberMantissa; + lesserLow = 0ull; uint64_t diffHigh, diffLow; diffHigh = greaterHigh - lesserHigh; @@ -285,37 +280,30 @@ inline uint64_t subMantissas128NormalizeResult(const uint64_t greaterNumberManti { msbIdx = _findMSB(diffLow); if (msbIdx == -1) - return 0ull; // TODO: for sure? + return 0ull; } else { msbIdx += 64; } - // TODO: optimize - while (msbIdx > 52) - { - uint64_t lostBit = (diffHigh & 0x1ull) << 63; - diffHigh >>= 1; - diffLow >>= 1; - diffLow |= lostBit; + static const int TargetMSB = 52 + 64; + int shiftAmount = msbIdx - TargetMSB; + resultExp += shiftAmount; - --resultBiasedExp; - --msbIdx; + if (shiftAmount > 0) + { + diffHigh >>= shiftAmount; } - - while (msbIdx < 52) + else if (shiftAmount < 0) { - uint64_t lostBit = (diffLow >> 63) & 0x1ull; - diffHigh <<= 1; - diffHigh |= lostBit; - diffLow <<= 1; - - ++resultBiasedExp; - ++msbIdx; + shiftAmount = -shiftAmount; + diffHigh <<= shiftAmount; + const uint64_t shiftedOutBits = diffLow >> (64 - shiftAmount); + diffHigh |= shiftedOutBits; } - return diffLow; + return diffHigh; } } From 0cb210d1df827d12c5e0931986624703b411100c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 27 Sep 2024 21:04:44 -0700 Subject: [PATCH 061/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8c873fabf5..af5ae2717b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8c873fabf5fbcf893e33b828844fd593d4985f9e +Subproject commit af5ae2717b4de966ad761af846fade8cc55eef5f From 8095e05539bf65cede47b9e5ddf61405900e32a5 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 27 Sep 2024 23:05:53 -0700 Subject: [PATCH 062/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index af5ae2717b..846e7aa5af 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit af5ae2717b4de966ad761af846fade8cc55eef5f +Subproject commit 846e7aa5afa7ad1344ec8e18041e669ddfddee00 From 9e86de2dc837a468249c4d0aaf7875b9232de2ad Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 28 Sep 2024 12:17:48 -0700 Subject: [PATCH 063/432] Added ef64_benchmark --- examples_tests | 2 +- include/nbl/builtin/hlsl/emulated/float64_t.hlsl | 11 ++++++++--- .../quadrature/gauss_legendre/gauss_legendre.hlsl | 12 ++++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/examples_tests b/examples_tests index 846e7aa5af..a4fe41dcb1 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 846e7aa5afa7ad1344ec8e18041e669ddfddee00 +Subproject commit a4fe41dcb18359ea6a9944f63858df86a4e987f4 diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index d01b73f1a3..2f41e1b571 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -86,13 +86,13 @@ namespace hlsl this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { // TODO: REMOVE! - /*float64_t sum = bit_cast(data) + bit_cast(rhs.data); + float64_t sum = bit_cast(data) + bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); this_t output2; output2.data = sumAsUint; - return output2;*/ + return output2; if (FlushDenormToZero) { @@ -277,7 +277,7 @@ namespace hlsl emulated_float64_t operator*(float rhs) { - return _static_cast(data) * create(rhs); + return bit_cast(data) * create(rhs); } emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC @@ -339,6 +339,11 @@ namespace hlsl } } + emulated_float64_t operator/(const float rhs) NBL_CONST_MEMBER_FUNC + { + return bit_cast(data) * create(rhs); + } + // relational operators bool operator==(this_t rhs) NBL_CONST_MEMBER_FUNC { diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index 61055305da..3aa3c047dc 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -5,7 +5,8 @@ #define _NBL_BUILTIN_HLSL_MATH_QUADRATURE_GAUSS_LEGENDRE_INCLUDED_ #include - +// TODO: portable/float64_t.hlsl instead? +#include namespace nbl { @@ -24,7 +25,7 @@ struct GaussLegendreIntegration { static float_t calculateIntegral(NBL_CONST_REF_ARG(IntegrandFunc) func, float_t start, float_t end) { - float_t integral = 0.0; + float_t integral = _static_cast(0ull); for (uint32_t i = 0u; i < Order; ++i) { const float_t xi = GaussLegendreValues::xi(i) * ((end - start) / 2.0) + ((end + start) / 2.0); @@ -46,6 +47,13 @@ struct GaussLegendreIntegration #undef TYPED_NUMBER #undef float_t +// TODO: do for every emulated_float64_t +#define float_t emulated_float64_t +#define TYPED_NUMBER(N) emulated_float64_t::create(N); +#include +#undef TYPED_NUMBER +#undef float_t + } // quadrature } // math } // hlsl From cf2d4334a211dce4831ed444bba380442dc7a132 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 30 Sep 2024 09:52:52 +0200 Subject: [PATCH 064/432] correctly update shaderc (we was missing 1 year of commits causing shaderc to requesting still HLSL & OGLCompiler stub libraries which got removed either from glslang & shaderc on latest revisions --- 3rdparty/shaderc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/shaderc b/3rdparty/shaderc index e166325b24..d2564ba598 160000 --- a/3rdparty/shaderc +++ b/3rdparty/shaderc @@ -1 +1 @@ -Subproject commit e166325b24d79d64bfa47065328890ce116ea642 +Subproject commit d2564ba5989c9de1a76714b3e59ec60595e9be50 From 8afc5dbb8c3ca00e92d3cc518f1efa2efb6c3660 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 30 Sep 2024 10:12:16 +0200 Subject: [PATCH 065/432] and now update glslang to be up-to-date (still we had divergence) --- 3rdparty/glslang | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/glslang b/3rdparty/glslang index 7bc35fa99c..ff26c2e995 160000 --- a/3rdparty/glslang +++ b/3rdparty/glslang @@ -1 +1 @@ -Subproject commit 7bc35fa99cb8572715bea9d3e977f9b27423337b +Subproject commit ff26c2e995ae521cb9fbc902fb4d686a47e4eb53 From 6f092a0f9f8595a77d80b23b4251ea17135bc60b Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 30 Sep 2024 10:17:56 +0200 Subject: [PATCH 066/432] update build system, make Nabla build with fully updated glslang & shaderc --- 3rdparty/CMakeLists.txt | 9 --------- src/nbl/CMakeLists.txt | 2 -- 2 files changed, 11 deletions(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 8f65e1b2e8..932660a36d 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -161,11 +161,6 @@ target_include_directories(SPIRV PUBLIC "${GLSLANG_GENERATED_INCLUDEDIR}") set(SHADERC_SKIP_TESTS ON CACHE INTERNAL "Skip shaderc tests?") set(SHADERC_SKIP_INSTALL ON CACHE INTERNAL "Install shaderc?") -# if it doesn't work without the `touch` on Linux, then fetch the latest submodule head of shaderc and try again -# https://github.com/google/shaderc/issues/568 -if (UNIX) - file(WRITE ${THIRD_PARTY_SOURCE_DIR}/shaderc/libshaderc/libshaderc_combined.a "") -endif() add_subdirectory(shaderc shaderc EXCLUDE_FROM_ALL) # libjpeg-turbo @@ -468,7 +463,6 @@ set(NBL_3RDPARTY_TARGETS simdjson nlohmann_json glslang - OGLCompiler OSDependent MachineIndependent GenericCodeGen @@ -494,9 +488,6 @@ endif() if (NBL_BUILD_IMGUI) list(APPEND NBL_3RDPARTY_TARGETS imgui implot imtestsuite imtestengine imguizmo) endif() -if(ENABLE_HLSL) - list(APPEND NBL_3RDPARTY_TARGETS HLSL) -endif() foreach(trgt IN LISTS NBL_3RDPARTY_TARGETS) if(NBL_DYNAMIC_MSVC_RUNTIME) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 6f38f7e1df..a454bc636a 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -714,7 +714,6 @@ if(NBL_STATIC_BUILD) nbl_install_lib(glslang) nbl_install_lib(GenericCodeGen) nbl_install_lib(MachineIndependent) - nbl_install_lib(HLSL) nbl_install_lib(jpeg-static) if (_NBL_COMPILE_WITH_OPEN_EXR_) nbl_install_lib(OpenEXR) @@ -728,7 +727,6 @@ if(NBL_STATIC_BUILD) nbl_install_lib(SPIRV) nbl_install_lib(SPIRV-Tools-static) # TODO: make this function/macro work with alias target nbl_install_lib(SPIRV-Tools-opt) - nbl_install_lib(OGLCompiler) nbl_install_lib(OSDependent) nbl_install_lib(zlibstatic) nbl_install_lib(simdjson) From 7e3fea07357b2ef5f0fd247ebec0b7661d56ff9e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 30 Sep 2024 16:02:58 -0700 Subject: [PATCH 067/432] Saving work --- .../math/quadrature/gauss_legendre/gauss_legendre.hlsl | 7 ++++--- .../builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index 3aa3c047dc..543c1ed16a 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -29,7 +29,7 @@ struct GaussLegendreIntegration for (uint32_t i = 0u; i < Order; ++i) { const float_t xi = GaussLegendreValues::xi(i) * ((end - start) / 2.0) + ((end + start) / 2.0); - integral += GaussLegendreValues::wi(i) * func(xi); + integral = integral + GaussLegendreValues::wi(i) * func(xi); } return ((end - start) / 2.0) * integral; } @@ -48,8 +48,9 @@ struct GaussLegendreIntegration #undef float_t // TODO: do for every emulated_float64_t -#define float_t emulated_float64_t -#define TYPED_NUMBER(N) emulated_float64_t::create(N); + +#define float_t emulated_float64_t +#define TYPED_NUMBER(N) emulated_float64_t::create(N) #include #undef TYPED_NUMBER #undef float_t diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl index 0c27ed3287..262468d19f 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl @@ -338,7 +338,7 @@ NBL_CONSTEXPR float_t wi_15[15] = { #define DEF_GAUSS_LEGENDRE_VALS(N) \ template<> \ -struct GaussLegendreValues \ +struct GaussLegendreValues \ { \ static float_t xi(uint32_t idx) { return NAMESPACE_NAME::xi_##N[idx]; } \ static float_t wi(uint32_t idx) { return NAMESPACE_NAME::wi_##N[idx]; } \ From a73b60e69d7fcf737f119cea9b006ebb39adc8c9 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 30 Sep 2024 22:54:08 -0700 Subject: [PATCH 068/432] Tests works --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 20 ++++++++++++++++++- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 4 ++-- .../gauss_legendre/gauss_legendre.hlsl | 6 ++++++ .../math/quadrature/gauss_legendre/impl.hlsl | 11 +++++----- 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/examples_tests b/examples_tests index a4fe41dcb1..ca0216686d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a4fe41dcb18359ea6a9944f63858df86a4e987f4 +Subproject commit ca0216686d2f51bd5a0dcabea523f83dddf5f93e diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 2f41e1b571..c62b67d568 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -72,7 +72,7 @@ namespace hlsl retval.data = (uint64_t(hi) << 32) | uint64_t(lo); return retval; #else - return bit_cast(reinterpret_cast(val)); + return bit_cast(val); #endif } @@ -216,6 +216,15 @@ namespace hlsl emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + // TODO: remove + float64_t sum = bit_cast(data) * bit_cast(rhs.data); + uint64_t sumAsUint = bit_cast(sum); + + this_t output2; + output2.data = sumAsUint; + + return output2; + if(FlushDenormToZero) { emulated_float64_t retval = this_t::create(0ull); @@ -282,6 +291,15 @@ namespace hlsl emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + // TODO: remove + float64_t sum = bit_cast(data) / bit_cast(rhs.data); + uint64_t sumAsUint = bit_cast(sum); + + this_t output2; + output2.data = sumAsUint; + + return output2; + if (FlushDenormToZero) { const uint64_t sign = (data ^ rhs.data) & ieee754::traits::signMask; diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index ccc49cd904..dc32398248 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -81,7 +81,7 @@ NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) // TODO: where do i move this function? also rename template -static inline int _findMSB(Int val) +inline int _findMSB(Int val) { //static_assert(is_integral::value); #ifndef __HLSL_VERSION @@ -92,7 +92,7 @@ static inline int _findMSB(Int val) } template <> -static inline int _findMSB(uint64_t val) +inline int _findMSB(uint64_t val) { #ifndef __HLSL_VERSION return nbl::hlsl::findMSB(val); diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index 543c1ed16a..ec223770be 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -36,23 +36,29 @@ struct GaussLegendreIntegration }; #define float_t float32_t +#define float_t_namespace impl_float32_t #define TYPED_NUMBER(N) NBL_CONCATENATE(N, f) // to add f after floating point numbers and avoid casting warnings and emitting ShaderFloat64 Caps #include #undef TYPED_NUMBER +#undef float_t_namespace #undef float_t #define float_t float64_t +#define float_t_namespace impl_float64_t #define TYPED_NUMBER(N) N #include #undef TYPED_NUMBER +#undef float_t_namespace #undef float_t // TODO: do for every emulated_float64_t #define float_t emulated_float64_t +#define float_t_namespace impl_emulated_float64_t_true_true #define TYPED_NUMBER(N) emulated_float64_t::create(N) #include #undef TYPED_NUMBER +#undef float_t_namespace #undef float_t } // quadrature diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl index 262468d19f..3bcfbb2388 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl @@ -5,13 +5,14 @@ #ifndef float_t #error Define float_t before including #endif +#ifndef float_t_namespace +#error Define float_t_namespace before including +#endif #ifndef TYPED_NUMBER #error Define TYPED_NUMBER before including #endif -#define NAMESPACE_NAME NBL_CONCATENATE(impl_, float_t) - -namespace NAMESPACE_NAME +namespace float_t_namespace { NBL_CONSTEXPR float_t xi_2[2] = { TYPED_NUMBER(-0.5773502691896257), @@ -340,8 +341,8 @@ NBL_CONSTEXPR float_t wi_15[15] = { template<> \ struct GaussLegendreValues \ { \ - static float_t xi(uint32_t idx) { return NAMESPACE_NAME::xi_##N[idx]; } \ - static float_t wi(uint32_t idx) { return NAMESPACE_NAME::wi_##N[idx]; } \ + static float_t xi(uint32_t idx) { return float_t_namespace::xi_##N[idx]; } \ + static float_t wi(uint32_t idx) { return float_t_namespace::wi_##N[idx]; } \ } DEF_GAUSS_LEGENDRE_VALS(2); From 4eb1c05c8e10736efb29e86ed2196c6f58bba4ce Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 2 Oct 2024 11:53:49 -0700 Subject: [PATCH 069/432] Saving work --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index ca0216686d..742224863a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ca0216686d2f51bd5a0dcabea523f83dddf5f93e +Subproject commit 742224863a4ad9e66182893c57584774fdee830b From 2dc3c48fddca0b39ed32410eb7a922bcb8b13e4c Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:12:58 +0100 Subject: [PATCH 070/432] Disable GL directives --- src/nbl/asset/utils/CGLSLCompiler.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/nbl/asset/utils/CGLSLCompiler.cpp b/src/nbl/asset/utils/CGLSLCompiler.cpp index f90be9f27a..a6ba06011a 100644 --- a/src/nbl/asset/utils/CGLSLCompiler.cpp +++ b/src/nbl/asset/utils/CGLSLCompiler.cpp @@ -103,9 +103,9 @@ namespace nbl::asset::impl { auto res_str = std::move(result.contents); //employ encloseWithinExtraInclGuards() in order to prevent infinite loop of (not necesarilly direct) self-inclusions while other # directives (incl guards among them) are disabled - CGLSLCompiler::disableAllDirectivesExceptIncludes(res_str); - disableGlDirectives(res_str); - res_str = CGLSLCompiler::encloseWithinExtraInclGuards(std::move(res_str), m_maxInclCnt, name.string().c_str()); + //CGLSLCompiler::disableAllDirectivesExceptIncludes(res_str); + //disableGlDirectives(res_str); + //res_str = CGLSLCompiler::encloseWithinExtraInclGuards(std::move(res_str), m_maxInclCnt, name.string().c_str()); res->content_length = res_str.size(); res->content = new char[res_str.size() + 1u]; @@ -145,8 +145,8 @@ std::string CGLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE insertion << "#define " << define.identifier << " " << define.definition << "\n"; insertIntoStart(code,std::move(insertion)); } - disableAllDirectivesExceptIncludes(code); - disableGlDirectives(code); + //disableAllDirectivesExceptIncludes(code); + //disableGlDirectives(code); shaderc::Compiler comp; shaderc::CompileOptions options; options.SetTargetSpirv(shaderc_spirv_version_1_6); @@ -164,8 +164,8 @@ std::string CGLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE } auto resolvedString = std::string(res.cbegin(), std::distance(res.cbegin(), res.cend())); - reenableDirectives(resolvedString); - reenableGlDirectives(resolvedString); + //reenableDirectives(resolvedString); + //reenableGlDirectives(resolvedString); return resolvedString; } From 75d6d9453117b7e0e2e845d583760667ddcf1ecd Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 3 Oct 2024 17:39:20 -0700 Subject: [PATCH 071/432] Benchmark works --- examples_tests | 2 +- include/nbl/builtin/hlsl/emulated/float64_t.hlsl | 5 ++++- .../math/quadrature/gauss_legendre/gauss_legendre.hlsl | 10 +++++++++- include/nbl/video/ILogicalDevice.h | 4 +++- src/nbl/video/CVulkanLogicalDevice.cpp | 2 +- 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index 742224863a..69156640cd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 742224863a4ad9e66182893c57584774fdee830b +Subproject commit 69156640cd7386084f99e9af09f280101590db45 diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index c62b67d568..19f95f9323 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -291,6 +291,8 @@ namespace hlsl emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + printf("%llu", rhs.data); + // TODO: remove float64_t sum = bit_cast(data) / bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); @@ -359,7 +361,8 @@ namespace hlsl emulated_float64_t operator/(const float rhs) NBL_CONST_MEMBER_FUNC { - return bit_cast(data) * create(rhs); + printf("guwno"); + return bit_cast(data) / create(rhs); } // relational operators diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index ec223770be..053ea02a47 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -28,9 +28,17 @@ struct GaussLegendreIntegration float_t integral = _static_cast(0ull); for (uint32_t i = 0u; i < Order; ++i) { - const float_t xi = GaussLegendreValues::xi(i) * ((end - start) / 2.0) + ((end + start) / 2.0); + const float_t xi = GaussLegendreValues::xi(i) * ((end - start) / 2.0f) + ((end + start) / 2.0f); integral = integral + GaussLegendreValues::wi(i) * func(xi); + + float_t a = GaussLegendreValues::xi(i); + float_t b = (end - start) / 2.0f; + + //printf("x = %ull, xi = %ull, ((end - start) / 2.0) = %ull", bit_cast(integral), bit_cast(a), bit_cast(b)); + //printf("start = %llu, end = %llu", bit_cast(start), bit_cast(end)); + printf("((end - start)) = %ull", bit_cast(b)); } + return ((end - start) / 2.0) * integral; } }; diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 02fad9abd7..70546ec86a 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -925,6 +925,8 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return nullptr; } break; + case IQueryPool::TYPE::TIMESTAMP: + break; default: NBL_LOG_ERROR("Unsupported query pool type"); return nullptr; @@ -939,7 +941,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe NBL_LOG_ERROR("The queryPool was not created by this device"); return false; } - if (firstQuery + queryCount >= queryPool->getCreationParameters().queryCount) + if (firstQuery + queryCount > queryPool->getCreationParameters().queryCount) { NBL_LOG_ERROR("Query index out of bounds"); return false; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index c90ffb94a7..03f2c94494 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1434,7 +1434,7 @@ core::smart_refctd_ptr CVulkanLogicalDevice::createQueryPool_impl(co info.pipelineStatistics = CVulkanQueryPool::getVkPipelineStatisticsFlagsFrom(params.pipelineStatisticsFlags.value); VkQueryPool vk_queryPool = VK_NULL_HANDLE; - if (m_devf.vk.vkCreateQueryPool(m_vkdev,&info,nullptr,&vk_queryPool)!=VK_SUCCESS) + if (m_devf.vk.vkCreateQueryPool(m_vkdev,&info,nullptr,&vk_queryPool)==VK_SUCCESS) return core::make_smart_refctd_ptr(this,params,vk_queryPool); return nullptr; } From 39b8bd2cd458a4a2f8304cd50374d36e4359a87b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 4 Oct 2024 10:39:07 -0700 Subject: [PATCH 072/432] Saving work --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 15 ++++------ .../gauss_legendre/gauss_legendre.hlsl | 30 +++++++++++++++---- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/examples_tests b/examples_tests index 69156640cd..582f002cef 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 69156640cd7386084f99e9af09f280101590db45 +Subproject commit 582f002cef18e0a75bbcd16210745a783c197663 diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 19f95f9323..2c40c8ba2e 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -86,13 +86,13 @@ namespace hlsl this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { // TODO: REMOVE! - float64_t sum = bit_cast(data) + bit_cast(rhs.data); + /*float64_t sum = bit_cast(data) + bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); this_t output2; output2.data = sumAsUint; - return output2; + return output2;*/ if (FlushDenormToZero) { @@ -217,13 +217,13 @@ namespace hlsl emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { // TODO: remove - float64_t sum = bit_cast(data) * bit_cast(rhs.data); + /*float64_t sum = bit_cast(data) * bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); this_t output2; output2.data = sumAsUint; - return output2; + return output2;*/ if(FlushDenormToZero) { @@ -291,16 +291,14 @@ namespace hlsl emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - printf("%llu", rhs.data); - // TODO: remove - float64_t sum = bit_cast(data) / bit_cast(rhs.data); + /*float64_t sum = bit_cast(data) / bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); this_t output2; output2.data = sumAsUint; - return output2; + return output2;*/ if (FlushDenormToZero) { @@ -361,7 +359,6 @@ namespace hlsl emulated_float64_t operator/(const float rhs) NBL_CONST_MEMBER_FUNC { - printf("guwno"); return bit_cast(data) / create(rhs); } diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index 053ea02a47..823c368b4d 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -33,10 +33,6 @@ struct GaussLegendreIntegration float_t a = GaussLegendreValues::xi(i); float_t b = (end - start) / 2.0f; - - //printf("x = %ull, xi = %ull, ((end - start) / 2.0) = %ull", bit_cast(integral), bit_cast(a), bit_cast(b)); - //printf("start = %llu, end = %llu", bit_cast(start), bit_cast(end)); - printf("((end - start)) = %ull", bit_cast(b)); } return ((end - start) / 2.0) * integral; @@ -59,8 +55,6 @@ struct GaussLegendreIntegration #undef float_t_namespace #undef float_t -// TODO: do for every emulated_float64_t - #define float_t emulated_float64_t #define float_t_namespace impl_emulated_float64_t_true_true #define TYPED_NUMBER(N) emulated_float64_t::create(N) @@ -69,6 +63,30 @@ struct GaussLegendreIntegration #undef float_t_namespace #undef float_t +#define float_t emulated_float64_t +#define float_t_namespace impl_emulated_float64_t_false_false +#define TYPED_NUMBER(N) emulated_float64_t::create(N) +#include +#undef TYPED_NUMBER +#undef float_t_namespace +#undef float_t + +#define float_t emulated_float64_t +#define float_t_namespace impl_emulated_float64_t_false_true +#define TYPED_NUMBER(N) emulated_float64_t::create(N) +#include +#undef TYPED_NUMBER +#undef float_t_namespace +#undef float_t + +#define float_t emulated_float64_t +#define float_t_namespace impl_emulated_float64_t_true_false +#define TYPED_NUMBER(N) emulated_float64_t::create(N) +#include +#undef TYPED_NUMBER +#undef float_t_namespace +#undef float_t + } // quadrature } // math } // hlsl From 38952118cc143d230c05b04cad12212d3e8e247d Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 5 Oct 2024 01:56:26 -0700 Subject: [PATCH 073/432] Improved benchmark --- examples_tests | 2 +- include/nbl/asset/utils/CCompilerSet.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 582f002cef..8c371527bd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 582f002cef18e0a75bbcd16210745a783c197663 +Subproject commit 8c371527bd39600a85425f80a785fe875778dcdc diff --git a/include/nbl/asset/utils/CCompilerSet.h b/include/nbl/asset/utils/CCompilerSet.h index 0b65db6b4e..3e5fd0d6ce 100644 --- a/include/nbl/asset/utils/CCompilerSet.h +++ b/include/nbl/asset/utils/CCompilerSet.h @@ -33,6 +33,8 @@ namespace nbl::asset #ifdef _NBL_PLATFORM_WINDOWS_ return m_HLSLCompiler; +#else + return nullptr; #endif } else if (contentType == IShader::E_CONTENT_TYPE::ECT_GLSL) From f3afeb4e08f65bf9fa4f53e030cb63f4e9dde7b7 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 7 Oct 2024 07:47:04 -0700 Subject: [PATCH 074/432] Updated benchmark example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8c371527bd..3912137730 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8c371527bd39600a85425f80a785fe875778dcdc +Subproject commit 391213773098baf46552d3e4959ae744c9c973e5 From ab86fdfdbc3589083fa35baf91ebcfb96c5890e0 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 7 Oct 2024 15:13:47 -0700 Subject: [PATCH 075/432] Fixed substraction --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 40 +++++++++---------- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 24 +++++------ .../gauss_legendre/gauss_legendre.hlsl | 1 - 4 files changed, 32 insertions(+), 35 deletions(-) diff --git a/examples_tests b/examples_tests index 846e7aa5af..239b3b0afb 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 846e7aa5afa7ad1344ec8e18041e669ddfddee00 +Subproject commit 239b3b0afbbecce4f878afd3e79e35d397e6ac2c diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index d01b73f1a3..fec353b4e4 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -75,25 +75,10 @@ namespace hlsl return bit_cast(reinterpret_cast(val)); #endif } - - // TODO: unresolved external symbol imath_half_to_float_table - /*static emulated_float64_t create(float16_t val) - { - return emulated_float64_t(bit_cast(float64_t(val))); - }*/ // arithmetic operators this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - // TODO: REMOVE! - /*float64_t sum = bit_cast(data) + bit_cast(rhs.data); - uint64_t sumAsUint = bit_cast(sum); - - this_t output2; - output2.data = sumAsUint; - - return output2;*/ - if (FlushDenormToZero) { if(FastMath) @@ -137,7 +122,10 @@ namespace hlsl else return bit_cast(0ull); } - + if(emulated_float64_t_impl::isZero(lhsData)) + return bit_cast(rhsData); + if (emulated_float64_t_impl::isZero(rhsData)) + return bit_cast(lhsData); if (tgmath::isInf(lhsData)) return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(lhsData, rhsData))); } @@ -157,25 +145,35 @@ namespace hlsl swap(lhsSign, rhsSign); } - rhsNormMantissa >>= shiftAmount; - uint64_t resultMantissa; if (lhsSign != rhsSign) { - int64_t mantissaDiff = lhsNormMantissa - rhsNormMantissa; + uint64_t rhsNormMantissaHigh = shiftAmount >= 64 ? 0ull : rhsNormMantissa >> shiftAmount; + uint64_t rhsNormMantissaLow = 0ull; + if (shiftAmount < 128) + { + if (shiftAmount >= 64) + rhsNormMantissaLow = rhsNormMantissa >> (shiftAmount - 64); + else + rhsNormMantissaLow = rhsNormMantissa << (64 - shiftAmount); + } + + const int64_t mantissaDiff = int64_t(lhsNormMantissa) - int64_t(rhsNormMantissaHigh); + // can only happen when shiftAmount == 0, so it is safe to swap only high bits of rhs mantissa if (mantissaDiff < 0) { - swap(lhsNormMantissa, rhsNormMantissa); + swap(lhsNormMantissa, rhsNormMantissaHigh); swap(lhsSign, rhsSign); } - resultMantissa = emulated_float64_t_impl::subMantissas128NormalizeResult(lhsNormMantissa, rhsNormMantissa, exp); + resultMantissa = emulated_float64_t_impl::subMantissas128NormalizeResult(lhsNormMantissa, rhsNormMantissaHigh, rhsNormMantissaLow, exp); if (resultMantissa == 0ull) return _static_cast(0ull); } else { + rhsNormMantissa >>= shiftAmount; resultMantissa = lhsNormMantissa + rhsNormMantissa; if (resultMantissa & 1ull << 53) diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index ccc49cd904..2618789276 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -260,19 +260,23 @@ inline uint64_t divmod128by64(const uint64_t dividentHigh, const uint64_t divide return (q1 << 32) | q0; } -inline uint64_t subMantissas128NormalizeResult(const uint64_t greaterNumberMantissa, const uint64_t lesserNumberMantissa, NBL_REF_ARG(int) resultExp) +struct uint128_t { - uint64_t greaterHigh, greaterLow, lesserHigh, lesserLow; + uint64_t highBits; + uint64_t lowBits; +}; + +inline uint64_t subMantissas128NormalizeResult(const uint64_t greaterNumberMantissa, const uint64_t lesserNumberMantissaHigh, const uint64_t lesserNumberMantissaLow, NBL_REF_ARG(int) resultExp) +{ + uint64_t greaterHigh, greaterLow; greaterHigh = greaterNumberMantissa; greaterLow = 0ull; - lesserHigh = lesserNumberMantissa; - lesserLow = 0ull; uint64_t diffHigh, diffLow; - diffHigh = greaterHigh - lesserHigh; - diffLow = greaterLow - lesserLow; + diffHigh = greaterHigh - lesserNumberMantissaHigh; + diffLow = greaterLow - lesserNumberMantissaLow; - if (diffLow > greaterLow) + if (lesserNumberMantissaLow > greaterLow) --diffHigh; int msbIdx = _findMSB(diffHigh); @@ -291,11 +295,7 @@ inline uint64_t subMantissas128NormalizeResult(const uint64_t greaterNumberManti int shiftAmount = msbIdx - TargetMSB; resultExp += shiftAmount; - if (shiftAmount > 0) - { - diffHigh >>= shiftAmount; - } - else if (shiftAmount < 0) + if (shiftAmount < 0) { shiftAmount = -shiftAmount; diffHigh <<= shiftAmount; diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index 61055305da..d97438d218 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -6,7 +6,6 @@ #include - namespace nbl { namespace hlsl From ff5c747dc01e4d2e0baf6fb62f823d268980d355 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 8 Oct 2024 11:25:29 -0700 Subject: [PATCH 076/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 3912137730..0b39d2d312 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 391213773098baf46552d3e4959ae744c9c973e5 +Subproject commit 0b39d2d31233554e9dfa818b647119649c7ba065 From f11d61e5fab3e9d7964a95cb9812fc0e407fca99 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 8 Oct 2024 17:23:58 -0700 Subject: [PATCH 077/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0b39d2d312..879860a22b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0b39d2d31233554e9dfa818b647119649c7ba065 +Subproject commit 879860a22b2f2d62f97114dce46c80cefd3a217e From eecbbaa7a683dedafae561845ba22145756a89a0 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 9 Oct 2024 07:08:24 -0700 Subject: [PATCH 078/432] Updated examples --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples_tests b/examples_tests index 879860a22b..3c063f8e3f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 879860a22b2f2d62f97114dce46c80cefd3a217e +Subproject commit 3c063f8e3f3ef8609f78b6c6da9c693bae344d98 diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index cbf1584edd..19f02a5b10 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -81,7 +81,7 @@ namespace hlsl { if (FlushDenormToZero) { - if(FastMath) + if(!FastMath) { if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return bit_cast(ieee754::traits::quietNaN); @@ -113,7 +113,7 @@ namespace hlsl uint64_t lhsSign = ieee754::extractSignPreserveBitPattern(lhsData); uint64_t rhsSign = ieee754::extractSignPreserveBitPattern(rhsData); - if(FastMath) + if(!FastMath) { if (emulated_float64_t_impl::areBothZero(lhsData, rhsData)) { @@ -241,7 +241,7 @@ namespace hlsl uint64_t rhsMantissa = ieee754::extractMantissa(rhsData); int exp = int(lhsBiasedExp + rhsBiasedExp) - ieee754::traits::exponentBias; - if (FastMath) + if (!FastMath) { if (tgmath::isNaN(lhsData) || tgmath::isNaN(rhsData)) return bit_cast(ieee754::traits::quietNaN | sign); @@ -308,7 +308,7 @@ namespace hlsl uint64_t lhsData = emulated_float64_t_impl::flushDenormToZero(lhsBiasedExp, data); uint64_t rhsData = emulated_float64_t_impl::flushDenormToZero(rhsBiasedExp, rhs.data); - if(FastMath) + if(!FastMath) { if (tgmath::isNaN(lhsData) || tgmath::isNaN(rhsData)) return bit_cast(ieee754::traits::quietNaN); @@ -363,7 +363,7 @@ namespace hlsl // relational operators bool operator==(this_t rhs) NBL_CONST_MEMBER_FUNC { - if (FastMath) + if (!FastMath) { if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return false; @@ -379,14 +379,14 @@ namespace hlsl } bool operator!=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) + if (!FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) return false; return !(bit_cast(data) == rhs); } bool operator<(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (FastMath) + if (!FastMath) { if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return false; @@ -407,7 +407,7 @@ namespace hlsl } bool operator>(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (FastMath) + if (!FastMath) { if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) return false; @@ -428,14 +428,14 @@ namespace hlsl } bool operator<=(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { - if (FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) + if (!FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) return false; return !(bit_cast(data) > bit_cast(rhs.data)); } bool operator>=(emulated_float64_t rhs) { - if (FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) + if (!FastMath && (tgmath::isNaN(data) || tgmath::isNaN(rhs.data))) return false; return !(bit_cast(data) < bit_cast(rhs.data)); From 684bcc20169a44f7f709354ca7171b58154e7877 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 10 Oct 2024 10:39:01 -0700 Subject: [PATCH 079/432] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 3c063f8e3f..38282a3784 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 3c063f8e3f3ef8609f78b6c6da9c693bae344d98 +Subproject commit 38282a37847b56b655653832ffe4cae8c3b76863 From 6236fdd809376508e0e46794746269055acbdab1 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 10 Oct 2024 14:54:10 -0700 Subject: [PATCH 080/432] Fixed ef64 with fast math enabled --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 23 ++++++++++--------- .../gauss_legendre/gauss_legendre.hlsl | 3 --- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/examples_tests b/examples_tests index 38282a3784..fa5a77b1c6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 38282a37847b56b655653832ffe4cae8c3b76863 +Subproject commit fa5a77b1c65db8b83ad6ba0e38508d29ddc788ac diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 19f02a5b10..4f97f28102 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -115,21 +115,22 @@ namespace hlsl if(!FastMath) { - if (emulated_float64_t_impl::areBothZero(lhsData, rhsData)) - { - if (lhsSign == rhsSign) - return bit_cast(lhsSign); - else - return bit_cast(0ull); - } - if(emulated_float64_t_impl::isZero(lhsData)) - return bit_cast(rhsData); - if (emulated_float64_t_impl::isZero(rhsData)) - return bit_cast(lhsData); if (tgmath::isInf(lhsData)) return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(lhsData, rhsData))); } + if (emulated_float64_t_impl::areBothZero(lhsData, rhsData)) + { + if (lhsSign == rhsSign) + return bit_cast(lhsSign); + else + return bit_cast(0ull); + } + if (emulated_float64_t_impl::isZero(lhsData)) + return bit_cast(rhsData); + if (emulated_float64_t_impl::isZero(rhsData)) + return bit_cast(lhsData); + uint64_t lhsNormMantissa = ieee754::extractNormalizeMantissa(lhsData); uint64_t rhsNormMantissa = ieee754::extractNormalizeMantissa(rhsData); diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index 823c368b4d..1eeba76546 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -30,9 +30,6 @@ struct GaussLegendreIntegration { const float_t xi = GaussLegendreValues::xi(i) * ((end - start) / 2.0f) + ((end + start) / 2.0f); integral = integral + GaussLegendreValues::wi(i) * func(xi); - - float_t a = GaussLegendreValues::xi(i); - float_t b = (end - start) / 2.0f; } return ((end - start) / 2.0) * integral; From 52db910d2006041fbeb087f40666b497797fb607 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sat, 12 Oct 2024 22:14:21 +0330 Subject: [PATCH 081/432] lib shader stage Signed-off-by: Ali Cheraghi --- include/nbl/asset/IDescriptorSetLayout.h | 2 +- include/nbl/asset/utils/IMeshPackerV2.h | 4 ++-- include/nbl/asset/utils/IVirtualTexture.h | 2 +- include/nbl/builtin/hlsl/enums.hlsl | 2 +- include/nbl/video/CVulkanCommon.h | 2 +- src/nbl/asset/utils/CHLSLCompiler.cpp | 8 ++++++-- src/nbl/video/ILogicalDevice.cpp | 4 ++-- 7 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/nbl/asset/IDescriptorSetLayout.h b/include/nbl/asset/IDescriptorSetLayout.h index 34e7d2ccc4..44e8be71ea 100644 --- a/include/nbl/asset/IDescriptorSetLayout.h +++ b/include/nbl/asset/IDescriptorSetLayout.h @@ -330,7 +330,7 @@ class IDescriptorSetLayout : public IDescriptorSetLayoutBase bindings[i].binding = i; bindings[i].type = type; bindings[i].createFlags = SBinding::E_CREATE_FLAGS::ECF_NONE; - bindings[i].stageFlags = stageAccessFlags ? stageAccessFlags[i]:asset::IShader::ESS_ALL; + bindings[i].stageFlags = stageAccessFlags ? stageAccessFlags[i]:asset::IShader::ESS_ALL_OR_LIBRARY; bindings[i].count = counts ? counts[i]:1u; bindings[i].samplers = nullptr; } diff --git a/include/nbl/asset/utils/IMeshPackerV2.h b/include/nbl/asset/utils/IMeshPackerV2.h index 89aec7e685..f6418807fb 100644 --- a/include/nbl/asset/utils/IMeshPackerV2.h +++ b/include/nbl/asset/utils/IMeshPackerV2.h @@ -380,7 +380,7 @@ class IMeshPackerV2 : public IMeshPacker, public I { bnd->binding = binding; bnd->count = count; - bnd->stageFlags = asset::ISpecializedShader::ESS_ALL; + bnd->stageFlags = asset::ISpecializedShader::ESS_ALL_OR_LIBRARY; bnd->type = asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER; bnd->samplers = nullptr; bnd++; @@ -517,7 +517,7 @@ class IMeshPackerV2 : public IMeshPacker, public I { bnd->binding = binding; bnd->count = 1u; - bnd->stageFlags = asset::ISpecializedShader::ESS_ALL; + bnd->stageFlags = asset::ISpecializedShader::ESS_ALL_OR_LIBRARY; bnd->type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; bnd->samplers = nullptr; bnd++; diff --git a/include/nbl/asset/utils/IVirtualTexture.h b/include/nbl/asset/utils/IVirtualTexture.h index ec26f56103..17ecfb7e38 100644 --- a/include/nbl/asset/utils/IVirtualTexture.h +++ b/include/nbl/asset/utils/IVirtualTexture.h @@ -1047,7 +1047,7 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa auto fillBinding = [](auto& bnd, uint32_t _binding, uint32_t _count, core::smart_refctd_ptr* _samplers) { bnd.binding = _binding; bnd.count = _count; - bnd.stageFlags = asset::IShader::E_SHADER_STAGE::ESS_ALL; + bnd.stageFlags = asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY; bnd.type = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; bnd.immutableSamplers = _samplers; }; diff --git a/include/nbl/builtin/hlsl/enums.hlsl b/include/nbl/builtin/hlsl/enums.hlsl index 990b6273ad..7201df183a 100644 --- a/include/nbl/builtin/hlsl/enums.hlsl +++ b/include/nbl/builtin/hlsl/enums.hlsl @@ -29,7 +29,7 @@ enum ShaderStage : uint32_t ESS_INTERSECTION = 1 << 12, ESS_CALLABLE = 1 << 13, ESS_ALL_GRAPHICS = 0x0000001F, - ESS_ALL = 0x7fffffff + ESS_ALL_OR_LIBRARY = 0x7fffffff }; enum SampleCountFlags : uint16_t diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index 1a0f53fdbb..16511a1c5e 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -464,7 +464,7 @@ inline VkShaderStageFlags getVkShaderStageFlagsFromShaderStage(const core::bitfl if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) ret |= VK_SHADER_STAGE_INTERSECTION_BIT_KHR; if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_CALLABLE)) ret |= VK_SHADER_STAGE_CALLABLE_BIT_KHR; if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_ALL_GRAPHICS)) ret |= VK_SHADER_STAGE_ALL_GRAPHICS; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_ALL)) ret |= VK_SHADER_STAGE_ALL; + if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY)) ret |= VK_SHADER_STAGE_ALL; return ret; } diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index ca0af41baf..ac3e8cf165 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -42,6 +42,8 @@ static const wchar_t* ShaderStageToString(asset::IShader::E_SHADER_STAGE stage) return L"as"; case asset::IShader::E_SHADER_STAGE::ESS_MESH: return L"ms"; + case asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY: + return L"lib"; default: return nullptr; }; @@ -403,8 +405,10 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std: arguments.push_back(L"-HV"); arguments.push_back(L"202x"); // TODO: add this to `CHLSLCompiler::SOptions` and handle it properly in `dxc_compile_flags.empty()` - arguments.push_back(L"-E"); - arguments.push_back(L"main"); + if (stage != asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY) { + arguments.push_back(L"-E"); + arguments.push_back(L"main"); + } // If a custom SPIR-V optimizer is specified, use that instead of DXC's spirv-opt. // This is how we can get more optimizer options. // diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 8c6a7752a6..32c53b53d0 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -291,8 +291,6 @@ core::smart_refctd_ptr ILogicalDevice::createShader(const SShaderCre // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00706 switch (shaderStage) { - case IGPUShader::E_SHADER_STAGE::ESS_VERTEX: - break; case IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL: [[fallthrough]]; case IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION: if (!features.tessellationShader) @@ -308,6 +306,8 @@ core::smart_refctd_ptr ILogicalDevice::createShader(const SShaderCre return nullptr; } break; + case IGPUShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY: [[fallthrough]]; + case IGPUShader::E_SHADER_STAGE::ESS_VERTEX: [[fallthrough]]; case IGPUShader::E_SHADER_STAGE::ESS_FRAGMENT: [[fallthrough]]; case IGPUShader::E_SHADER_STAGE::ESS_COMPUTE: break; From 8caa917b60d6a5ff7511baa1284e19e6e4d6a905 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 15 Oct 2024 04:47:58 -0700 Subject: [PATCH 082/432] Fixed array_get and array_set --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 24 +++-- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 95 +++++++++++-------- 3 files changed, 72 insertions(+), 49 deletions(-) diff --git a/examples_tests b/examples_tests index fa5a77b1c6..e4a42f42ef 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit fa5a77b1c65db8b83ad6ba0e38508d29ddc788ac +Subproject commit e4a42f42efe0e0345a337870bae1e64c6357cefd diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 4f97f28102..eec7a27c46 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -79,6 +79,15 @@ namespace hlsl // arithmetic operators this_t operator+(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { + // TODO: remove + float64_t sum = bit_cast(data) + bit_cast(rhs.data); + uint64_t sumAsUint = bit_cast(sum); + + this_t output2; + output2.data = sumAsUint; + + return output2; + if (FlushDenormToZero) { if(!FastMath) @@ -216,13 +225,13 @@ namespace hlsl emulated_float64_t operator*(emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { // TODO: remove - /*float64_t sum = bit_cast(data) * bit_cast(rhs.data); + float64_t sum = bit_cast(data) * bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); this_t output2; output2.data = sumAsUint; - return output2;*/ + return output2; if(FlushDenormToZero) { @@ -291,13 +300,13 @@ namespace hlsl emulated_float64_t operator/(const emulated_float64_t rhs) NBL_CONST_MEMBER_FUNC { // TODO: remove - /*float64_t sum = bit_cast(data) / bit_cast(rhs.data); + float64_t sum = bit_cast(data) / bit_cast(rhs.data); uint64_t sumAsUint = bit_cast(sum); this_t output2; output2.data = sumAsUint; - return output2;*/ + return output2; if (FlushDenormToZero) { @@ -535,18 +544,20 @@ struct static_cast_helper,void static_assert(is_scalar::value); using From = emulated_float64_t; - + static inline To cast(From v) { using ToAsFloat = typename float_of_size::type; using ToAsUint = typename unsigned_integer_of_size::type; + if (emulated_float64_t_impl::isZero(v.data)) + return 0; + if (is_same_v) return To(bit_cast(v.data)); if (is_floating_point::value) { - const int exponent = ieee754::extractExponent(v.data); if (!From::isFastMathSupported) { @@ -558,7 +569,6 @@ struct static_cast_helper,void return bit_cast(ieee754::traits::quietNaN); } - const uint32_t toBitSize = sizeof(To) * 8; const ToAsUint sign = ToAsUint(ieee754::extractSign(v.data) << (toBitSize - 1)); const ToAsUint biasedExponent = ToAsUint(exponent + ieee754::traits::exponentBias) << ieee754::traits::mantissaBitCnt; diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 537171ff3e..768c6b8c85 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -37,7 +37,7 @@ struct _2_component_vec // TODO: avoid code duplication, make it constexpr using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); return nbl::hlsl::bit_cast(invalidComponentValue); } @@ -382,66 +382,79 @@ struct is_valid_emulated_vector is_same_v >; }; -#ifdef __HLSL_VERSION template struct array_get { - T operator()(NBL_CONST_REF_ARG(U) vec, const I ix) + T operator()(NBL_REF_ARG(U) vec, const I ix) { return vec[ix]; } }; -template -struct array_get, TT, I> +template +struct array_get, ComponentType, uint32_t> +{ + ComponentType operator()(NBL_REF_ARG(emulated_vector_t2) vec, const uint32_t ix) + { + return vec.getComponent(ix); + } +}; + +template +struct array_get, ComponentType, uint32_t> { - TT operator()(NBL_CONST_REF_ARG(emulated_vector_t) vec, const I ix) + ComponentType operator()(NBL_REF_ARG(emulated_vector_t3) vec, const uint32_t ix) { return vec.getComponent(ix); } }; -#endif - -//template -//struct array_get -//{ -// T operator()(I index, NBL_CONST_REF_ARG(U) arr) -// { -// return arr[index]; -// } -//}; -// -//template -//struct array_get::component_t, emulated_vector_t, uint32_t> -//{ -// using vec_t = emulated_vector_t; -// -// T operator()(uint32_t index, NBL_CONST_REF_ARG(vec_t) vec) -// { -// return vec.getComponent(index); -// } -//}; - -template + +template +struct array_get, ComponentType, uint32_t> +{ + ComponentType operator()(NBL_REF_ARG(emulated_vector_t4) vec, const uint32_t ix) + { + return vec.getComponent(ix); + } +}; + +#undef DEFINE_EMULATED_VECTOR_ARRAY_GET_SPECIALIZATION + +template struct array_set { - void operator()(I index, NBL_REF_ARG(U) arr, T val) + void operator()(NBL_REF_ARG(U) arr, I index, T val) { arr[index] = val; } }; -// TODO: fix -//template -//struct array_set, uint32_t> -//{ -// using type_t = T; -// -// T operator()(uint32_t index, NBL_CONST_REF_ARG(emulated_vector_t) vec, T value) -// { -// vec.setComponent(index, value); -// } -//}; +template +struct array_set, ComponentType, uint32_t> +{ + void operator()(NBL_REF_ARG(emulated_vector_t2) vec, uint32_t index, ComponentType value) + { + vec.setComponent(index, value); + } +}; + +template +struct array_set, ComponentType, uint32_t> +{ + void operator()(NBL_REF_ARG(emulated_vector_t3) vec, uint32_t index, ComponentType value) + { + vec.setComponent(index, value); + } +}; + +template +struct array_set, ComponentType, uint32_t> +{ + void operator()(NBL_REF_ARG(emulated_vector_t4) vec, uint32_t index, ComponentType value) + { + vec.setComponent(index, value); + } +}; namespace impl { From 847dc080731a9369ab95c83c1a1c6a8bfc86a219 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 25 Oct 2024 16:39:38 +0400 Subject: [PATCH 083/432] update examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index a80dcf86cf..ceb08477ef 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a80dcf86cfe3712fd8fdb4170f60afc53637fec6 +Subproject commit ceb08477ef578008a490067b613114ae1e469666 From 7df72fee46fa04fa6be3bbbd3aecb028c2096560 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 11 Nov 2024 10:21:48 +0700 Subject: [PATCH 084/432] ngfx integration --- include/nbl/video/IAPIConnection.h | 14 ++++- include/nbl/video/utilities/ngfx.h | 79 +++++++++++++++++++++++++++++ src/nbl/video/CVulkanConnection.cpp | 15 ++++-- src/nbl/video/IAPIConnection.cpp | 3 ++ 4 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 include/nbl/video/utilities/ngfx.h diff --git a/include/nbl/video/IAPIConnection.h b/include/nbl/video/IAPIConnection.h index b20a3573d1..840944bab2 100644 --- a/include/nbl/video/IAPIConnection.h +++ b/include/nbl/video/IAPIConnection.h @@ -11,7 +11,7 @@ #include "nbl/video/debug/IDebugCallback.h" #include "nbl/video/utilities/renderdoc.h" - +#include "nbl/video/utilities/ngfx.h" namespace nbl::video { @@ -61,7 +61,16 @@ class NBL_API2 IAPIConnection : public core::IReferenceCounted const SFeatures& getEnabledFeatures() const { return m_enabledFeatures; } - const bool isRunningInRenderdoc() const { return m_rdoc_api; } + enum DebuggerType + { + EDT_NONE, + EDT_RENDERDOC, + EDT_NGFX + }; + const DebuggerType isRunningInGraphicsDebugger() const { + return m_ngfx_api.useNGFX ? EDT_NGFX : // ngfx takes priority? + m_rdoc_api ? EDT_RENDERDOC : EDT_NONE; + } virtual bool startCapture() = 0; virtual bool endCapture() = 0; @@ -70,6 +79,7 @@ class NBL_API2 IAPIConnection : public core::IReferenceCounted std::vector> m_physicalDevices; renderdoc_api_t* m_rdoc_api; + ngfx_api_t m_ngfx_api; SFeatures m_enabledFeatures = {}; }; diff --git a/include/nbl/video/utilities/ngfx.h b/include/nbl/video/utilities/ngfx.h new file mode 100644 index 0000000000..af260b72e2 --- /dev/null +++ b/include/nbl/video/utilities/ngfx.h @@ -0,0 +1,79 @@ +#ifndef _NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ +#define _NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ + +#include "C:\Program Files\NVIDIA Corporation\Nsight Graphics 2024.1.0\SDKs\NsightGraphicsSDK\0.8.0\include\NGFX_Injection.h" + +namespace nbl::video +{ + struct SNGFXIntegration + { + bool useNGFX; + NGFX_Injection_InstallationInfo versionInfo; + }; + + bool injectNGFXToProcess(SNGFXIntegration& api) + { + uint32_t numInstallations = 0; + auto result = NGFX_Injection_EnumerateInstallations(&numInstallations, nullptr); + if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) + { + api.useNGFX = false; + return false; + } + + std::vector installations(numInstallations); + result = NGFX_Injection_EnumerateInstallations(&numInstallations, installations.data()); + if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) + { + api.useNGFX = false; + return false; + } + + // get latest installation + api.versionInfo = installations.back(); + + uint32_t numActivities = 0; + result = NGFX_Injection_EnumerateActivities(&api.versionInfo, &numActivities, nullptr); + if (numActivities == 0 || NGFX_INJECTION_RESULT_OK != result) + { + api.useNGFX = false; + return false; + } + + std::vector activities(numActivities); + result = NGFX_Injection_EnumerateActivities(&api.versionInfo, &numActivities, activities.data()); + if (NGFX_INJECTION_RESULT_OK != result) + { + api.useNGFX = false; + return false; + } + + const NGFX_Injection_Activity* pActivityToInject = nullptr; + for (const NGFX_Injection_Activity& activity : activities) + { + if (activity.type == NGFX_INJECTION_ACTIVITY_FRAME_DEBUGGER) // only want frame debugger + { + pActivityToInject = &activity; + break; + } + } + + if (!pActivityToInject) { + api.useNGFX = false; + return false; + } + + result = NGFX_Injection_InjectToProcess(&api.versionInfo, pActivityToInject); + if (NGFX_INJECTION_RESULT_OK != result) + { + api.useNGFX = false; + return false; + } + + return true; + } + + using ngfx_api_t = SNGFXIntegration; +} + +#endif //_NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ \ No newline at end of file diff --git a/src/nbl/video/CVulkanConnection.cpp b/src/nbl/video/CVulkanConnection.cpp index 737059d947..c0b5fe1e7f 100644 --- a/src/nbl/video/CVulkanConnection.cpp +++ b/src/nbl/video/CVulkanConnection.cpp @@ -323,7 +323,8 @@ CVulkanConnection::~CVulkanConnection() bool CVulkanConnection::startCapture() { - if (!isRunningInRenderdoc()) + auto debugType = isRunningInGraphicsDebugger(); + if (debugType == EDT_NONE) return false; if (flag.test()) { @@ -335,13 +336,17 @@ bool CVulkanConnection::startCapture() } flag.test_and_set(); - m_rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(m_vkInstance), NULL); + if (debugType == EDT_RENDERDOC) + m_rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(m_vkInstance), NULL); + else + NGFX_Injection_ExecuteActivityCommand(); return true; } bool CVulkanConnection::endCapture() { - if (!isRunningInRenderdoc()) + auto debugType = isRunningInGraphicsDebugger(); + if (debugType == EDT_NONE) return false; if (!flag.test()) { @@ -352,7 +357,9 @@ bool CVulkanConnection::endCapture() return false; } - m_rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(m_vkInstance), NULL); + if (debugType == EDT_RENDERDOC) + m_rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(m_vkInstance), NULL); + // no equivalent end frame capture for ngfx, ends captures on next frame delimiter flag.clear(); return true; } diff --git a/src/nbl/video/IAPIConnection.cpp b/src/nbl/video/IAPIConnection.cpp index 5b3d72760f..750fd02f5d 100644 --- a/src/nbl/video/IAPIConnection.cpp +++ b/src/nbl/video/IAPIConnection.cpp @@ -43,6 +43,9 @@ IAPIConnection::IAPIConnection(const SFeatures& enabledFeatures) int ret = RENDERDOC_GetAPI(MinRenderdocVersion, (void**)&m_rdoc_api); assert(ret == 1); #endif + + // probably is platform agnostic, for now + injectNGFXToProcess(m_ngfx_api); } } From ecb1d09bb82ac5f843d74b4d08009b50da9f7988 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 11 Nov 2024 11:19:57 +0700 Subject: [PATCH 085/432] fixed some linker issues --- include/nbl/video/utilities/ngfx.h | 8 +++++++- src/nbl/video/CVulkanConnection.cpp | 2 +- src/nbl/video/IAPIConnection.cpp | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nbl/video/utilities/ngfx.h b/include/nbl/video/utilities/ngfx.h index af260b72e2..91eabfa2b3 100644 --- a/include/nbl/video/utilities/ngfx.h +++ b/include/nbl/video/utilities/ngfx.h @@ -1,6 +1,7 @@ #ifndef _NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ #define _NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ +// TODO: hopefully this is temporary #include "C:\Program Files\NVIDIA Corporation\Nsight Graphics 2024.1.0\SDKs\NsightGraphicsSDK\0.8.0\include\NGFX_Injection.h" namespace nbl::video @@ -11,7 +12,7 @@ namespace nbl::video NGFX_Injection_InstallationInfo versionInfo; }; - bool injectNGFXToProcess(SNGFXIntegration& api) + inline bool injectNGFXToProcess(SNGFXIntegration& api) { uint32_t numInstallations = 0; auto result = NGFX_Injection_EnumerateInstallations(&numInstallations, nullptr); @@ -73,6 +74,11 @@ namespace nbl::video return true; } + inline void executeNGFXCommand() + { + NGFX_Injection_ExecuteActivityCommand(); + } + using ngfx_api_t = SNGFXIntegration; } diff --git a/src/nbl/video/CVulkanConnection.cpp b/src/nbl/video/CVulkanConnection.cpp index c0b5fe1e7f..e1a33a1418 100644 --- a/src/nbl/video/CVulkanConnection.cpp +++ b/src/nbl/video/CVulkanConnection.cpp @@ -339,7 +339,7 @@ bool CVulkanConnection::startCapture() if (debugType == EDT_RENDERDOC) m_rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(m_vkInstance), NULL); else - NGFX_Injection_ExecuteActivityCommand(); + executeNGFXCommand(); return true; } diff --git a/src/nbl/video/IAPIConnection.cpp b/src/nbl/video/IAPIConnection.cpp index 750fd02f5d..8dc156bb94 100644 --- a/src/nbl/video/IAPIConnection.cpp +++ b/src/nbl/video/IAPIConnection.cpp @@ -2,6 +2,7 @@ #include "nbl/video/IPhysicalDevice.h" #include "nbl/video/utilities/renderdoc.h" +#include "nbl/video/utilities/ngfx.h" #if defined(_NBL_POSIX_API_) #include From 9f25ce511c49fe4faf1e4588355b26b6b538202b Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Wed, 13 Nov 2024 18:49:15 +0100 Subject: [PATCH 086/432] make Nabla docker build "exec" based with git cache volume --- CMakeLists.txt | 1 + docker/.env | 2 + docker/Dockerfile | 116 +++++++++++++ docker/compose.yml | 28 +++ docker/compose/Dockerfile | 162 ------------------ .../ci/stages/.env/platform/windows/.env | 8 - .../stages/dev/axes/dynamic/debug/compose.yml | 81 --------- .../dev/axes/dynamic/release/compose.yml | 79 --------- .../axes/dynamic/relwithdebinfo/compose.yml | 81 --------- .../stages/dev/axes/static/debug/compose.yml | 81 --------- .../dev/axes/static/release/compose.yml | 79 --------- .../axes/static/relwithdebinfo/compose.yml | 81 --------- docker/compose/ci/stages/dev/compose.yml | 39 ----- docker/compose/scripts/os/javaHome.py | 23 --- docker/compose/scripts/os/resources/zoo.cfg | 6 - docker/dev.py | 62 ------- docker/scripts/__init__.py | 0 docker/scripts/nbl/ci/dev/.vscode/launch.json | 117 ------------- docker/scripts/nbl/ci/dev/__init__.py | 0 docker/scripts/nbl/ci/dev/build.py | 84 --------- docker/scripts/nbl/ci/dev/cmake.py | 47 ----- docker/scripts/nbl/ci/dev/cpack.py | 72 -------- docker/scripts/nbl/ci/dev/lib/kazoo.py | 105 ------------ docker/scripts/ncpfmp.bat | 13 -- 24 files changed, 147 insertions(+), 1220 deletions(-) create mode 100644 docker/.env create mode 100644 docker/Dockerfile create mode 100644 docker/compose.yml delete mode 100644 docker/compose/Dockerfile delete mode 100644 docker/compose/ci/stages/.env/platform/windows/.env delete mode 100644 docker/compose/ci/stages/dev/axes/dynamic/debug/compose.yml delete mode 100644 docker/compose/ci/stages/dev/axes/dynamic/release/compose.yml delete mode 100644 docker/compose/ci/stages/dev/axes/dynamic/relwithdebinfo/compose.yml delete mode 100644 docker/compose/ci/stages/dev/axes/static/debug/compose.yml delete mode 100644 docker/compose/ci/stages/dev/axes/static/release/compose.yml delete mode 100644 docker/compose/ci/stages/dev/axes/static/relwithdebinfo/compose.yml delete mode 100644 docker/compose/ci/stages/dev/compose.yml delete mode 100644 docker/compose/scripts/os/javaHome.py delete mode 100644 docker/compose/scripts/os/resources/zoo.cfg delete mode 100644 docker/dev.py delete mode 100644 docker/scripts/__init__.py delete mode 100644 docker/scripts/nbl/ci/dev/.vscode/launch.json delete mode 100644 docker/scripts/nbl/ci/dev/__init__.py delete mode 100644 docker/scripts/nbl/ci/dev/build.py delete mode 100644 docker/scripts/nbl/ci/dev/cmake.py delete mode 100644 docker/scripts/nbl/ci/dev/cpack.py delete mode 100644 docker/scripts/nbl/ci/dev/lib/kazoo.py delete mode 100644 docker/scripts/ncpfmp.bat diff --git a/CMakeLists.txt b/CMakeLists.txt index 0976e00b52..4c0994c44b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ enable_language(C CXX ASM ASM_NASM) if(MSVC) enable_language(ASM_MASM) + link_libraries(delayimp) endif() option(NBL_STATIC_BUILD "" OFF) # ON for static builds, OFF for shared diff --git a/docker/.env b/docker/.env new file mode 100644 index 0000000000..623184f422 --- /dev/null +++ b/docker/.env @@ -0,0 +1,2 @@ +THIS_PROJECT_WORKING_DIRECTORY=C:\docker +THIS_PROJECT_NABLA_DIRECTORY=C:/Users/ContainerAdministrator/Nabla/bind \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000000..a0427ccfeb --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,116 @@ +# escape=` + +ARG BASE_IMAGE=mcr.microsoft.com/windows/servercore:ltsc2022-amd64 + +FROM ${BASE_IMAGE} + +SHELL ["cmd", "/S", "/C"] + +USER ContainerAdministrator + +ENV THIS_PROJECT_WORKING_DIRECTORY="C:\docker" +ENV THIS_PROJECT_NABLA_DIRECTORY="C:/Users/ContainerAdministrator/Nabla/bind" +ENV VULKAN_SDK_INSTALL_DIRECTORY="${THIS_PROJECT_WORKING_DIRECTORY}\dependencies\VulkanSDK" +ENV VS_INSTALL_DIRECTORY="${THIS_PROJECT_WORKING_DIRECTORY}\dependencies\VS\BuildTools" + +RUN ` + # Download the Build Tools (17.11.5 October 8, 2024 version) bootstrapper. https://learn.microsoft.com/en-us/visualstudio/releases/2022/release-history + ` + curl -SL --output vs_buildtools.exe https://download.visualstudio.microsoft.com/download/pr/69e24482-3b48-44d3-af65-51f866a08313/471c9a89fa8ba27d356748ae0cf25eb1f362184992dc0bb6e9ccf10178c43c27/vs_BuildTools.exe ` + ` + # Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools recommended workload and ATL & ATLMFC, excluding some Windows SDKs. + ` + && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache ` + --installPath "%VS_INSTALL_DIRECTORY%" ` + --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended ` + --add Microsoft.VisualStudio.Component.VC.ATL ` + --add Microsoft.VisualStudio.Component.VC.ATLMFC ` + --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 ` + --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 ` + --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 ` + --remove Microsoft.VisualStudio.Component.Windows81SDK ` + || IF "%ERRORLEVEL%"=="3010" EXIT 0) ` + ` + # Add VS's CMake to the system PATH and cleanup + ` + && setx PATH "%PATH%;%VS_INSTALL_DIRECTORY%\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin" /M ` + ` + # Cleanup + ` + && del /q vs_buildtools.exe + +ENV VS_DEV_CMD_DIRECTORY="${VS_INSTALL_DIRECTORY}\Common7\Tools" + +RUN ` + # Add VS_DEV_CMD_DIRECTORY to the system PATH + ` + setx PATH "%PATH%;%VS_DEV_CMD_DIRECTORY%" /M + +RUN ` + # Download VulkanSDK + ` + curl -SL --output VulkanSDK-Installer.exe https://sdk.lunarg.com/sdk/download/1.3.268.0/windows/VulkanSDK-1.3.268.0-Installer.exe ` + ` + # Install VulkanSDK + ` + && VulkanSDK-Installer.exe install --root "%VULKAN_SDK_INSTALL_DIRECTORY%" --default-answer --accept-licenses --confirm-command ` + ` + # Cleanup + ` + && del /q VulkanSDK-Installer.exe + +RUN ` + # Download & install choco packet manager + ` + powershell Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')) + +RUN ` + # Download & install executable Strawberry Perl + ` + choco install -y strawberryperl --version 5.28.2.1 + +RUN ` + # Download & install Python + ` + choco install -y python --version 3.11.6 + +RUN ` + # Download & install git + ` + choco install -y git --version 2.43.0 + +RUN ` + # Download & install nasm + ` + choco install -y nasm --version 2.16.1 + +RUN ` + # Download & install ninja + ` + choco install -y ninja --version 1.12.1 + +RUN ` + # Enable Long Paths feature + ` + reg add "HKLM\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d 1 /f + +RUN ` + # Force git to use HTTPS protocol & trust containers + ` + git config --system protocol.*.allow always ` + ` + && git config --system url."https://github.com/".insteadOf "git@github.com:" ` + ` + && git config --system --add safe.directory * + +RUN ` + # Post environment setup + ` + setx VS_INSTALL_DIRECTORY "%VS_INSTALL_DIRECTORY%" /M ` + ` + && setx PATH "%PATH%;%VS_INSTALL_DIRECTORY%\VC\Auxiliary\Build" /M ` + ` + && setx NBL_CI_MODE "ON" + +WORKDIR ${THIS_PROJECT_NABLA_DIRECTORY} +ENTRYPOINT ["cmd.exe", "/K"] \ No newline at end of file diff --git a/docker/compose.yml b/docker/compose.yml new file mode 100644 index 0000000000..04c1013081 --- /dev/null +++ b/docker/compose.yml @@ -0,0 +1,28 @@ +services: + nabla: + build: + context: . + dockerfile: Dockerfile + image: dcr.devsh.eu/nabla/source + container_name: dev.nabla.build + env_file: + - .env + environment: + - THIS_PROJECT_WORKING_DIRECTORY=${THIS_PROJECT_WORKING_DIRECTORY} + - THIS_PROJECT_NABLA_DIRECTORY=${THIS_PROJECT_NABLA_DIRECTORY} + volumes: + - nabla-cache-git:${THIS_PROJECT_NABLA_DIRECTORY}/.git + networks: + docker_default: + deploy: + resources: + limits: + cpus: '6' + memory: 12G + +volumes: + nabla-cache-git: + +networks: + docker_default: + external: true \ No newline at end of file diff --git a/docker/compose/Dockerfile b/docker/compose/Dockerfile deleted file mode 100644 index 959c5de7b3..0000000000 --- a/docker/compose/Dockerfile +++ /dev/null @@ -1,162 +0,0 @@ -# escape=` - -ARG BASE_IMAGE=mcr.microsoft.com/windows/servercore:ltsc2022 - -ARG THIS_PROJECT_WORKING_DIRECTORY="C:\docker" -ARG THIS_PROJECT_NABLA_DIRECTORY="C:/Users/ContainerAdministrator/Nabla/bind" -ARG VULKAN_SDK_INSTALL_DIRECTORY="${THIS_PROJECT_WORKING_DIRECTORY}\dependencies\VulkanSDK" -ARG VS_INSTALL_DIRECTORY="${THIS_PROJECT_WORKING_DIRECTORY}\dependencies\VS\BuildTools" -ARG APACHE_ZOOKEEPER_INSTALL_DIRECTORY="${THIS_PROJECT_WORKING_DIRECTORY}\dependencies\ApacheZooKeeper" -ARG JAVA_HOME_SCRIPT="${THIS_PROJECT_WORKING_DIRECTORY}\dependencies\scripts\java" -ARG VS_DEV_CMD_DIRECTORY="${VS_INSTALL_DIRECTORY}\Common7\Tools" - -FROM ${BASE_IMAGE} - -SHELL ["cmd", "/S", "/C"] - -ARG VS_INSTALL_DIRECTORY - -RUN ` - # Download the Build Tools bootstrapper. - ` - curl -SL --output vs_buildtools.exe https://aka.ms/vs/17/release/vs_buildtools.exe ` - ` - # Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools recommended workload and ATL & ATLMFC, excluding some Windows SDKs. - ` - && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache ` - --installPath "%VS_INSTALL_DIRECTORY%" ` - --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended ` - --add Microsoft.VisualStudio.Component.VC.ATL ` - --add Microsoft.VisualStudio.Component.VC.ATLMFC ` - --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 ` - --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 ` - --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 ` - --remove Microsoft.VisualStudio.Component.Windows81SDK ` - || IF "%ERRORLEVEL%"=="3010" EXIT 0) ` - ` - # add VS's CMake to the system PATH and cleanup - ` - && setx PATH "%PATH%;%VS_INSTALL_DIRECTORY%\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin" /M ` - ` - # Cleanup - ` - && del /q vs_buildtools.exe - -ARG VULKAN_SDK_INSTALL_DIRECTORY - -RUN ` - # Download VulkanSDK - ` - curl -SL --output VulkanSDK-Installer.exe https://sdk.lunarg.com/sdk/download/1.3.268.0/windows/VulkanSDK-1.3.268.0-Installer.exe ` - ` - # Install VulkanSDK - ` - && VulkanSDK-Installer.exe install --root "%VULKAN_SDK_INSTALL_DIRECTORY%" --default-answer --accept-licenses --confirm-command ` - ` - # Cleanup - ` - && del /q VulkanSDK-Installer.exe - -RUN ` - # Download & install choco packet manager - ` - powershell Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')) - -RUN ` - # Download & install executable Strawberry Perl 5.28.2.1 - ` - choco install -y strawberryperl --version 5.28.2.1 - -RUN ` - # Download & install Python 3.11.6 - ` - choco install -y python --version 3.11.6 - -RUN ` - # Donwload debugpy Python module - ` - python -m pip install --upgrade debugpy - -RUN ` - # Download & install git 2.43.0 - ` - choco install -y git --version 2.43.0 - -ARG APACHE_ZOOKEEPER_INSTALL_DIRECTORY - -RUN ` - # Download Apache ZooKeeper - ` - curl -SL --output zookeeper.zip https://dlcdn.apache.org/zookeeper/stable/apache-zookeeper-3.8.4-bin.tar.gz ` - ` - # Create install directory - ` - && mkdir "%APACHE_ZOOKEEPER_INSTALL_DIRECTORY%" ` - ` - # Unpack - ` - && tar -xf zookeeper.zip -C "%APACHE_ZOOKEEPER_INSTALL_DIRECTORY%" ` - ` - # Cleanup - ` - && del /q zookeeper.zip ` - ` - && setx PATH "%PATH%;%APACHE_ZOOKEEPER_INSTALL_DIRECTORY%\apache-zookeeper-3.8.4-bin\bin" /M - -RUN ` - # Download kazoo 2.8.0 Python (more recent versions doesn't work well with Windows) module - ` - python -m pip install kazoo==2.8.0 - -RUN ` - # Download psutil Python module - ` - python -m pip install psutil - -RUN ` - # Download OpenJDK 11 LTS - ` - choco install -y openjdk11 - -RUN ` - # Download & install nasm 2.16.1 - ` - choco install -y nasm --version 2.16.1 - -RUN ` - # Download & install nano 7.2.36 - ` - choco install -y nano --version 7.2.36 - -ARG THIS_PROJECT_WORKING_DIRECTORY - -RUN ` - setx THIS_PROJECT_WORKING_DIRECTORY "%THIS_PROJECT_WORKING_DIRECTORY%" /M - -ARG THIS_PROJECT_NABLA_DIRECTORY - -RUN ` - setx THIS_PROJECT_NABLA_DIRECTORY "%THIS_PROJECT_NABLA_DIRECTORY%" /M ` - && setx PATH "%PATH%;%THIS_PROJECT_NABLA_DIRECTORY%/docker/scripts" /M - -RUN ` - git config --system --add safe.directory * - -ARG JAVA_HOME_SCRIPT - -COPY scripts\os\javaHome.py ${JAVA_HOME_SCRIPT}\javaHome.py - -RUN ` - py "%JAVA_HOME_SCRIPT%\javaHome.py" - -COPY scripts\os\resources\zoo.cfg ${APACHE_ZOOKEEPER_INSTALL_DIRECTORY}\apache-zookeeper-3.8.3-bin\conf\zoo.cfg - -ARG VS_DEV_CMD_DIRECTORY - -RUN ` - setx PATH "%PATH%;%VS_DEV_CMD_DIRECTORY%" /M - -RUN ` - reg add "HKLM\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d 1 /f - -ENTRYPOINT ["powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] \ No newline at end of file diff --git a/docker/compose/ci/stages/.env/platform/windows/.env b/docker/compose/ci/stages/.env/platform/windows/.env deleted file mode 100644 index b0b0f41a5c..0000000000 --- a/docker/compose/ci/stages/.env/platform/windows/.env +++ /dev/null @@ -1,8 +0,0 @@ -THIS_PROJECT_PLATFORM=windows -THIS_PROJECT_BASE_IMAGE=artifactory.devsh.eu/nabla/${THIS_PROJECT_PLATFORM}/base:latest -THIS_PROJECT_ARCH=x86_64 -THIS_PROJECT_WORKING_DIRECTORY="C:\docker" -THIS_PROJECT_ARTIFACTORY_NABLA_DIRECTORY="C:/Users/ContainerAdministrator/Nabla/artifactory" -THIS_PROJECT_NABLA_DIRECTORY="C:/Users/ContainerAdministrator/Nabla/bind" -NABLA_TARGET_REVISION="docker" -THIS_PROJECT_DEBUG="" \ No newline at end of file diff --git a/docker/compose/ci/stages/dev/axes/dynamic/debug/compose.yml b/docker/compose/ci/stages/dev/axes/dynamic/debug/compose.yml deleted file mode 100644 index 1ffbc08ae0..0000000000 --- a/docker/compose/ci/stages/dev/axes/dynamic/debug/compose.yml +++ /dev/null @@ -1,81 +0,0 @@ -version: '3' - -services: - nabla.kazoo.server.dynamic.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.kazoo.server.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.kazoo.server.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - entrypoint: ["zkServer.cmd"] - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - healthcheck: - test: ["CMD", "ncpfmp.bat", "nbl.ci.dev.lib.kazoo", "--host", "localhost"] - interval: 30s - timeout: 10s - retries: 3 - - nabla.cmake.dynamic.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cmake.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cmake.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - environment: - - NBL_BUILD_DIR=${THIS_PROJECT_NABLA_DIRECTORY}/build/.docker/${THIS_PROJECT_PLATFORM}/${THIS_PROJECT_ARCH}/dynamic/debug - networks: - nabla.network: - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cmake", "--libType", "dynamic", "--config", "debug"] - - nabla.build.dynamic.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.build.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.build.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.cmake.dynamic.debug: - condition: service_completed_successfully - nabla.kazoo.server.dynamic.debug: - condition: service_healthy - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.build", "--config", "Debug", "--libType", "dynamic"] - - nabla.cpack.dynamic.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cpack.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cpack.dynamic.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.build.dynamic.debug: - condition: service_completed_successfully - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cpack", "--libType", "dynamic", "--config", "Debug"] - -networks: - nabla.network: - external: true \ No newline at end of file diff --git a/docker/compose/ci/stages/dev/axes/dynamic/release/compose.yml b/docker/compose/ci/stages/dev/axes/dynamic/release/compose.yml deleted file mode 100644 index cb902eb970..0000000000 --- a/docker/compose/ci/stages/dev/axes/dynamic/release/compose.yml +++ /dev/null @@ -1,79 +0,0 @@ -version: '3' - -services: - nabla.kazoo.server.dynamic.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.kazoo.server.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.kazoo.server.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - entrypoint: ["zkServer.cmd"] - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - healthcheck: - test: ["CMD", "ncpfmp.bat", "nbl.ci.dev.lib.kazoo", "--host", "localhost"] - interval: 30s - timeout: 10s - retries: 3 - - nabla.cmake.dynamic.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cmake.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cmake.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cmake", "--libType", "dynamic", "--config", "release"] - - nabla.build.dynamic.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.build.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.build.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.cmake.dynamic.release: - condition: service_completed_successfully - nabla.kazoo.server.dynamic.release: - condition: service_healthy - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.build", "--config", "Release", "--libType", "dynamic"] - - nabla.cpack.dynamic.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cpack.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cpack.dynamic.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.build.dynamic.release: - condition: service_completed_successfully - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cpack", "--libType", "dynamic", "--config", "Release"] - -networks: - nabla.network: - external: true \ No newline at end of file diff --git a/docker/compose/ci/stages/dev/axes/dynamic/relwithdebinfo/compose.yml b/docker/compose/ci/stages/dev/axes/dynamic/relwithdebinfo/compose.yml deleted file mode 100644 index f56329507e..0000000000 --- a/docker/compose/ci/stages/dev/axes/dynamic/relwithdebinfo/compose.yml +++ /dev/null @@ -1,81 +0,0 @@ -version: '3' - -services: - nabla.kazoo.server.dynamic.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.kazoo.server.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.kazoo.server.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - entrypoint: ["zkServer.cmd"] - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - healthcheck: - test: ["CMD", "ncpfmp.bat", "nbl.ci.dev.lib.kazoo", "--host", "localhost"] - interval: 30s - timeout: 10s - retries: 3 - - nabla.cmake.dynamic.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cmake.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cmake.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - environment: - - NBL_BUILD_DIR=${THIS_PROJECT_NABLA_DIRECTORY}/build/.docker/${THIS_PROJECT_PLATFORM}/${THIS_PROJECT_ARCH}/dynamic/relwithdebinfo - networks: - nabla.network: - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cmake", "--libType", "dynamic", "--config", "relwithdebinfo"] - - nabla.build.dynamic.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.build.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.build.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.cmake.dynamic.relwithdebinfo: - condition: service_completed_successfully - nabla.kazoo.server.dynamic.relwithdebinfo: - condition: service_healthy - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.build", "--config", "RelWithDebInfo", "--libType", "dynamic"] - - nabla.cpack.dynamic.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cpack.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cpack.dynamic.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "dynamic", "dev.dynamic", "dev.dynamic.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.build.dynamic.relwithdebinfo: - condition: service_completed_successfully - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cpack", "--libType", "dynamic", "--config", "RelWithDebInfo"] - -networks: - nabla.network: - external: true \ No newline at end of file diff --git a/docker/compose/ci/stages/dev/axes/static/debug/compose.yml b/docker/compose/ci/stages/dev/axes/static/debug/compose.yml deleted file mode 100644 index 48f2fd3800..0000000000 --- a/docker/compose/ci/stages/dev/axes/static/debug/compose.yml +++ /dev/null @@ -1,81 +0,0 @@ -version: '3' - -services: - nabla.kazoo.server.static.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.kazoo.server.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.kazoo.server.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - entrypoint: ["zkServer.cmd"] - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - healthcheck: - test: ["CMD", "ncpfmp.bat", "nbl.ci.dev.lib.kazoo", "--host", "localhost"] - interval: 30s - timeout: 10s - retries: 3 - - nabla.cmake.static.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cmake.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cmake.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - environment: - - NBL_BUILD_DIR=${THIS_PROJECT_NABLA_DIRECTORY}/build/.docker/${THIS_PROJECT_PLATFORM}/${THIS_PROJECT_ARCH}/static/debug - networks: - nabla.network: - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cmake", "--libType", "static", "--config", "debug"] - - nabla.build.static.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.build.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.build.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.cmake.static.debug: - condition: service_completed_successfully - nabla.kazoo.server.static.debug: - condition: service_healthy - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.build", "--config", "Debug", "--libType", "static"] - - nabla.cpack.static.debug: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cpack.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cpack.static.debug.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.debug"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.build.static.debug: - condition: service_completed_successfully - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cpack", "--libType", "static", "--config", "Debug"] - -networks: - nabla.network: - external: true \ No newline at end of file diff --git a/docker/compose/ci/stages/dev/axes/static/release/compose.yml b/docker/compose/ci/stages/dev/axes/static/release/compose.yml deleted file mode 100644 index 0856eb9e43..0000000000 --- a/docker/compose/ci/stages/dev/axes/static/release/compose.yml +++ /dev/null @@ -1,79 +0,0 @@ -version: '3' - -services: - nabla.kazoo.server.static.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.kazoo.server.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.kazoo.server.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - entrypoint: ["zkServer.cmd"] - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - healthcheck: - test: ["CMD", "ncpfmp.bat", "nbl.ci.dev.lib.kazoo", "--host", "localhost"] - interval: 30s - timeout: 10s - retries: 3 - - nabla.cmake.static.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cmake.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cmake.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cmake", "--libType", "static", "--config", "release"] - - nabla.build.static.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.build.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.build.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.cmake.static.release: - condition: service_completed_successfully - nabla.kazoo.server.static.release: - condition: service_healthy - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.build", "--config", "Release", "--libType", "static"] - - nabla.cpack.static.release: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cpack.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cpack.static.release.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.release"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.build.static.release: - condition: service_completed_successfully - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cpack", "--libType", "static", "--config", "Release"] - -networks: - nabla.network: - external: true \ No newline at end of file diff --git a/docker/compose/ci/stages/dev/axes/static/relwithdebinfo/compose.yml b/docker/compose/ci/stages/dev/axes/static/relwithdebinfo/compose.yml deleted file mode 100644 index 792ffb8789..0000000000 --- a/docker/compose/ci/stages/dev/axes/static/relwithdebinfo/compose.yml +++ /dev/null @@ -1,81 +0,0 @@ -version: '3' - -services: - nabla.kazoo.server.static.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.kazoo.server.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.kazoo.server.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - entrypoint: ["zkServer.cmd"] - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - healthcheck: - test: ["CMD", "ncpfmp.bat", "nbl.ci.dev.lib.kazoo", "--host", "localhost"] - interval: 30s - timeout: 10s - retries: 3 - - nabla.cmake.static.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cmake.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cmake.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - environment: - - NBL_BUILD_DIR=${THIS_PROJECT_NABLA_DIRECTORY}/build/.docker/${THIS_PROJECT_PLATFORM}/${THIS_PROJECT_ARCH}/static/relwithdebinfo - networks: - nabla.network: - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cmake", "--libType", "static", "--config", "relwithdebinfo"] - - nabla.build.static.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.build.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.build.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.cmake.static.relwithdebinfo: - condition: service_completed_successfully - nabla.kazoo.server.static.relwithdebinfo: - condition: service_healthy - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.build", "--config", "RelWithDebInfo", "--libType", "static"] - - nabla.cpack.static.relwithdebinfo: - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.cpack.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.cpack.static.relwithdebinfo.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - profiles: ["dev", "static", "dev.static", "dev.static.relwithdebinfo"] - env_file: - - ../../../../.env/platform/${THIS_PROJECT_PLATFORM}/.env - networks: - nabla.network: - depends_on: - nabla.build.static.relwithdebinfo: - condition: service_completed_successfully - volumes: - - type: bind - source: ../../../../../../../../ - target: ${THIS_PROJECT_NABLA_DIRECTORY} - entrypoint: ["ncpfmp.bat", "nbl.ci.dev.cpack", "--libType", "static", "--config", "RelWithDebInfo"] - -networks: - nabla.network: - external: true \ No newline at end of file diff --git a/docker/compose/ci/stages/dev/compose.yml b/docker/compose/ci/stages/dev/compose.yml deleted file mode 100644 index f1d0b64a3d..0000000000 --- a/docker/compose/ci/stages/dev/compose.yml +++ /dev/null @@ -1,39 +0,0 @@ -include: - - path: - - axes/static/release/compose.yml - project_directory: axes/static/release - env_file: ../.env/platform/${THIS_PROJECT_PLATFORM}/.env - - path: - - axes/static/relwithdebinfo/compose.yml - project_directory: axes/static/relwithdebinfo - env_file: ../.env/platform/${THIS_PROJECT_PLATFORM}/.env - - path: - - axes/static/debug/compose.yml - project_directory: axes/static/debug - env_file: ../.env/platform/${THIS_PROJECT_PLATFORM}/.env - - path: - - axes/dynamic/release/compose.yml - project_directory: axes/dynamic/release - env_file: ../.env/platform/${THIS_PROJECT_PLATFORM}/.env - - path: - - axes/dynamic/relwithdebinfo/compose.yml - project_directory: axes/dynamic/relwithdebinfo - env_file: ../.env/platform/${THIS_PROJECT_PLATFORM}/.env - - path: - - axes/dynamic/debug/compose.yml - project_directory: axes/dynamic/debug - env_file: ../.env/platform/${THIS_PROJECT_PLATFORM}/.env - -services: - nabla.init: - build: - context: ../../../ - args: - - THIS_PROJECT_NABLA_DIRECTORY=${THIS_PROJECT_NABLA_DIRECTORY} - env_file: - - ../.env/platform/${THIS_PROJECT_PLATFORM}/.env - image: ${THIS_PROJECT_BASE_IMAGE} - container_name: dev.nabla.init.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - hostname: dev.nabla.init.${THIS_PROJECT_ARCH}.${THIS_PROJECT_PLATFORM} - networks: - nabla.network: \ No newline at end of file diff --git a/docker/compose/scripts/os/javaHome.py b/docker/compose/scripts/os/javaHome.py deleted file mode 100644 index 347a72def5..0000000000 --- a/docker/compose/scripts/os/javaHome.py +++ /dev/null @@ -1,23 +0,0 @@ -import subprocess, os, re - -completedProcess = subprocess.run( - "java -XshowSettings:properties", - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - check=False -) - -output = completedProcess.stderr.strip() -regexMatch = re.search(r'java\.home = (.+)', output) -if regexMatch: - JAVA_HOME = regexMatch.group(1).strip() -else: - JAVA_HOME = "" - -if JAVA_HOME: - os.system(f'setx JAVA_HOME "{JAVA_HOME}" /M') - print(f'JAVA_HOME has been set to: {JAVA_HOME}') -else: - print("Error: Unable to retrieve or set JAVA_HOME.") \ No newline at end of file diff --git a/docker/compose/scripts/os/resources/zoo.cfg b/docker/compose/scripts/os/resources/zoo.cfg deleted file mode 100644 index 9dfefa90e1..0000000000 --- a/docker/compose/scripts/os/resources/zoo.cfg +++ /dev/null @@ -1,6 +0,0 @@ -tickTime=2000 -initLimit=10 -syncLimit=5 -dataDir=C:/tmp/zookeeper -clientPort=2181 -maxClientCnxns=500 \ No newline at end of file diff --git a/docker/dev.py b/docker/dev.py deleted file mode 100644 index d0ef2187e1..0000000000 --- a/docker/dev.py +++ /dev/null @@ -1,62 +0,0 @@ -import os, subprocess, sys, argparse - - -def parseInputArguments(): - parser = argparse.ArgumentParser(description="Nabla CI Pipeline compose Framework script") - - parser.add_argument("--platform", help="Target platform", type=str, default="windows") - parser.add_argument("--arch", help="Target arch", type=str, default="x86_64") - parser.add_argument('--profiles', nargs='*', default=["dev.dynamic.debug"], help='Target list of profiles to apply') - - args = parser.parse_args() - - return args - - -def updateSubmodules(root): - updateSubmoduleScript = os.path.normpath(os.path.join(root, "cmake/submodules/update.cmake")) - return subprocess.run(f"cmake -P \"{updateSubmoduleScript}\"", check=True) - - -def main(): - try: - args = parseInputArguments() - - root = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../")) - - updateSubmodules(root) - - os.chdir(os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "compose/ci/stages/dev"))) - - platform = args.platform - arch = args.arch - - if subprocess.call(["docker", "network", "inspect", "nabla.network"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) != 0: - subprocess.run(["docker", "network", "create", "--driver", "nat", "--subnet", "172.28.0.0/16", "--gateway", "172.28.5.1", "nabla.network"], check=True) # create nabla.network network if not present - - envFile = os.path.abspath(f"../.env/platform/{platform}/.env") - profiles = (lambda profiles: [item for profile in profiles for item in ["--profile", profile]])(args.profiles) - - compose = [ - "docker", "compose", - "-f", f"./compose.yml", - "--env-file", envFile - ] + profiles - - subprocess.run(compose + ["build"], check=True) - subprocess.run(compose + ["config"], check=True) - subprocess.run(compose + ["create", "--force-recreate"], check=True) - subprocess.run(compose + ["up"], check=True) - subprocess.run(compose + ["down"], check=True) - - except subprocess.CalledProcessError as e: - print(f"Subprocess failed with exit code {e.returncode}") - sys.exit(e.returncode) - - except Exception as e: - print(f"Unexpected error: {e}") - sys.exit(-1) - - -if __name__ == "__main__": - main() diff --git a/docker/scripts/__init__.py b/docker/scripts/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/docker/scripts/nbl/ci/dev/.vscode/launch.json b/docker/scripts/nbl/ci/dev/.vscode/launch.json deleted file mode 100644 index e664be35d1..0000000000 --- a/docker/scripts/nbl/ci/dev/.vscode/launch.json +++ /dev/null @@ -1,117 +0,0 @@ -{ - "version": "0.2.0", - "configurations": [ - { - "name": "dev.nabla.base.x86_64.windows Container Remote Debug", - "type": "python", - "request": "attach", - "connect": { - "host": "172.28.5.2", - "port": 5678 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ], - "justMyCode": true - }, - { - "name": "dev.nabla.static.release.x86_64.windows Container Remote Debug", - "type": "python", - "request": "attach", - "connect": { - "host": "172.28.5.3", - "port": 5679 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ], - "justMyCode": true - }, - { - "name": "dev.nabla.static.relwithdebinfo.x86_64.windows Container Remote Debug", - "type": "python", - "request": "attach", - "connect": { - "host": "172.28.5.4", - "port": 5680 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ], - "justMyCode": true - }, - { - "name": "dev.nabla.static.debug.x86_64.windows Container Remote Debug", - "type": "python", - "request": "attach", - "connect": { - "host": "172.28.5.5", - "port": 5681 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ], - "justMyCode": true - }, - { - "name": "dev.nabla.dynamic.release.x86_64.windows Container Remote Debug", - "type": "python", - "request": "attach", - "connect": { - "host": "172.28.5.6", - "port": 5682 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ], - "justMyCode": true - }, - { - "name": "dev.nabla.dynamic.relwithdebinfo.x86_64.windows Container Remote Debug", - "type": "python", - "request": "attach", - "connect": { - "host": "172.28.5.7", - "port": 5683 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ], - "justMyCode": true - }, - { - "name": "dev.nabla.dynamic.debug.x86_64.windows Container Remote Debug", - "type": "python", - "request": "attach", - "connect": { - "host": "172.28.5.8", - "port": 5684 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ], - "justMyCode": true - } - ] -} \ No newline at end of file diff --git a/docker/scripts/nbl/ci/dev/__init__.py b/docker/scripts/nbl/ci/dev/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/docker/scripts/nbl/ci/dev/build.py b/docker/scripts/nbl/ci/dev/build.py deleted file mode 100644 index b16f9d3a8b..0000000000 --- a/docker/scripts/nbl/ci/dev/build.py +++ /dev/null @@ -1,84 +0,0 @@ -import os, subprocess, sys, argparse -from .lib.kazoo import * - -def parseInputArguments(): - parser = argparse.ArgumentParser(description="Nabla CI Pipeline nbl.ci.dev.build Framework Module") - - parser.add_argument("--config", help="Target CMake configuration", type=str, default="Release") - parser.add_argument("--libType", help="Target library type", type=str, default="dynamic") - - args = parser.parse_args() - - return args - -def buildNabla(libType, config): - return subprocess.run(f"cmake --build --preset ci-build-{libType}-msvc-{config}", check=False) - - -def buildProject(libType, config, buildDirectory): - return subprocess.run(f"cmake --build \"{buildDirectory}\" --config {config}", check=False) - - -def getCPackBundleHash(buildDirectory, target, component = "ALL", relativeDirectory = "/"): - return f"{buildDirectory};{target};{component};{relativeDirectory};" - - -def main(): - try: - THIS_PROJECT_NABLA_DIRECTORY = os.environ.get('THIS_PROJECT_NABLA_DIRECTORY', '') - - if not THIS_PROJECT_NABLA_DIRECTORY: - raise ValueError("THIS_PROJECT_NABLA_DIRECTORY environment variables doesn't exist!") - - THIS_PROJECT_PLATFORM = os.environ.get('THIS_PROJECT_PLATFORM', '') - - if not THIS_PROJECT_PLATFORM: - raise ValueError("THIS_PROJECT_PLATFORM environment variables doesn't exist!") - - THIS_PROJECT_ARCH = os.environ.get('THIS_PROJECT_ARCH', '') - - if not THIS_PROJECT_ARCH: - raise ValueError("THIS_PROJECT_ARCH environment variables doesn't exist!") - - THIS_SERVICE_BINARY_PROJECT_PATH = os.environ.get('THIS_SERVICE_BINARY_PROJECT_PATH', '') - - os.chdir(THIS_PROJECT_NABLA_DIRECTORY) - - args = parseInputArguments() - - config = args.config - lowerCaseConfig = config.lower() - libType = args.libType - - topBuildDirectory = os.path.normpath(os.path.join(THIS_PROJECT_NABLA_DIRECTORY, f"build/.docker/{THIS_PROJECT_PLATFORM}/{THIS_PROJECT_ARCH}/{libType}/{lowerCaseConfig}")) - targetBuildDirectory = os.path.normpath(os.path.join(topBuildDirectory, THIS_SERVICE_BINARY_PROJECT_PATH)) - - if topBuildDirectory == targetBuildDirectory: - buildNabla(libType, lowerCaseConfig) - cpackBundleHash = getCPackBundleHash(topBuildDirectory, "Libraries") + getCPackBundleHash(topBuildDirectory, "Runtime") - else: - buildProject(libType, config, targetBuildDirectory) - cpackBundleHash += getCPackBundleHash(targetBuildDirectory, "Executables") + getCPackBundleHash(targetBuildDirectory, "Media") - - kazooConnector = KazooConnector(f"dev.nabla.kazoo.server.{libType}.{lowerCaseConfig}.x86_64.{THIS_PROJECT_PLATFORM}") # DNS record as compose service name - kazooConnector.connect() - - zNodePath = f"/CPACK_INSTALL_CMAKE_PROJECTS" - kazooConnector.createKazooAtomic(zNodePath) - kazooConnector.appendKazooAtomic(zNodePath, cpackBundleHash) - print(f"Atomic update performed on {zNodePath} zNode path") - print(f"cpackBundleHash = {cpackBundleHash}") - - kazooConnector.disconnect() - - except subprocess.CalledProcessError as e: - print(f"Subprocess failed with exit code {e.returncode}") - sys.exit(e.returncode) - - except Exception as e: - print(f"Unexpected error: {e}") - sys.exit(-1) - - -if __name__ == "__main__": - main() diff --git a/docker/scripts/nbl/ci/dev/cmake.py b/docker/scripts/nbl/ci/dev/cmake.py deleted file mode 100644 index b0ca2b86ed..0000000000 --- a/docker/scripts/nbl/ci/dev/cmake.py +++ /dev/null @@ -1,47 +0,0 @@ -import os, subprocess, sys, argparse - -def parseInputArguments(): - parser = argparse.ArgumentParser(description="Nabla CI Pipeline nbl.ci.dev.cmake Framework Module") - - parser.add_argument("--libType", help="Target library type", type=str, default="dynamic") - parser.add_argument("--config", help="Target library type", type=str, default="release") - - args = parser.parse_args() - - return args - -def configure(libType, config): - subprocess.run(f"cmake . --preset ci-configure-{libType}-msvc-{config}", check=True) - -def main(): - try: - THIS_PROJECT_NABLA_DIRECTORY = os.environ.get('THIS_PROJECT_NABLA_DIRECTORY', '') - - if not THIS_PROJECT_NABLA_DIRECTORY: - raise ValueError("THIS_PROJECT_NABLA_DIRECTORY environment variables doesn't exist!") - - THIS_PROJECT_ARCH = os.environ.get('THIS_PROJECT_ARCH', '') - - if not THIS_PROJECT_ARCH: - raise ValueError("THIS_PROJECT_ARCH environment variables doesn't exist!") - - os.chdir(THIS_PROJECT_NABLA_DIRECTORY) - - args = parseInputArguments() - - libType = args.libType - config = args.config - - configure(libType, config) - - except subprocess.CalledProcessError as e: - print(f"Subprocess failed with exit code {e.returncode}") - sys.exit(e.returncode) - - except Exception as e: - print(f"Unexpected error: {e}") - sys.exit(-1) - - -if __name__ == "__main__": - main() diff --git a/docker/scripts/nbl/ci/dev/cpack.py b/docker/scripts/nbl/ci/dev/cpack.py deleted file mode 100644 index 48a0adf790..0000000000 --- a/docker/scripts/nbl/ci/dev/cpack.py +++ /dev/null @@ -1,72 +0,0 @@ -import os, subprocess, argparse -from .lib.kazoo import * - -def parseInputArguments(): - parser = argparse.ArgumentParser(description="Nabla CI Pipeline nbl.ci.dev.cpack Framework Module") - - parser.add_argument("--config", help="Target CMake configuration", type=str, default="Release") - parser.add_argument("--libType", help="Target library type", type=str, default="dynamic") - - args = parser.parse_args() - - return args - -def cpack(libType, config, CPACK_INSTALL_CMAKE_PROJECTS, packageDirectory): - if not packageDirectory: - packageDirectory = f"./package/{config}/{libType}" - - return subprocess.run(f"cpack --preset ci-package-{libType}-msvc-{config} -B \"{packageDirectory}\" -D CPACK_INSTALL_CMAKE_PROJECTS=\"{CPACK_INSTALL_CMAKE_PROJECTS}\"", check=True) - - -def main(): - try: - THIS_PROJECT_NABLA_DIRECTORY = os.environ.get('THIS_PROJECT_NABLA_DIRECTORY', '') - - if not THIS_PROJECT_NABLA_DIRECTORY: - raise ValueError("THIS_PROJECT_NABLA_DIRECTORY environment variables doesn't exist!") - - THIS_PROJECT_PLATFORM = os.environ.get('THIS_PROJECT_PLATFORM', '') - - if not THIS_PROJECT_PLATFORM: - raise ValueError("THIS_PROJECT_PLATFORM environment variables doesn't exist!") - - THIS_PROJECT_ARCH = os.environ.get('THIS_PROJECT_ARCH', '') - - if not THIS_PROJECT_ARCH: - raise ValueError("THIS_PROJECT_ARCH environment variables doesn't exist!") - - os.chdir(THIS_PROJECT_NABLA_DIRECTORY) - - args = parseInputArguments() - - config = args.config - lowerCaseConfig = config.lower() - libType = args.libType - - kazooConnector = KazooConnector(f"dev.nabla.kazoo.server.{libType}.{lowerCaseConfig}.x86_64.{THIS_PROJECT_PLATFORM}") # DNS record as compose service name - kazooConnector.connect() - - zNodePath = f"/CPACK_INSTALL_CMAKE_PROJECTS" - cpackBundleHash = kazooConnector.getKazooAtomic(zNodePath) - print(f"Atomic read performed on {zNodePath} zNode path") - - kazooConnector.requestServerShutdown() - kazooConnector.disconnect() - - if cpackBundleHash: - print(f"CPACK_INSTALL_CMAKE_PROJECTS = {cpackBundleHash}") - cpack(libType, lowerCaseConfig, cpackBundleHash, f"{THIS_PROJECT_NABLA_DIRECTORY}/build/artifacts/{THIS_PROJECT_PLATFORM}/{THIS_PROJECT_ARCH}/{config}/{libType}") - else: - print("CPACK_INSTALL_CMAKE_PROJECTS is empty, skipping cpack...") - - except subprocess.CalledProcessError as e: - print(f"Subprocess failed with exit code {e.returncode}") - sys.exit(e.returncode) - - except Exception as e: - print(f"Unexpected error: {e}") - sys.exit(-1) - - -if __name__ == "__main__": - main() diff --git a/docker/scripts/nbl/ci/dev/lib/kazoo.py b/docker/scripts/nbl/ci/dev/lib/kazoo.py deleted file mode 100644 index 2e67b4edf4..0000000000 --- a/docker/scripts/nbl/ci/dev/lib/kazoo.py +++ /dev/null @@ -1,105 +0,0 @@ -import os, subprocess, sys, argparse, kazoo.exceptions, kazoo.client, socket - -def getLocalIPV4(): - try: - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect(('8.8.8.8', 80)) - local_ipv4 = s.getsockname()[0] # Get the local IPv4 address - s.close() - - return local_ipv4 - except socket.error: - return None - - -def resolveServiceToIPv4(serviceName): - try: - ipv4Address = socket.gethostbyname(serviceName) - return ipv4Address - except socket.gaierror as e: - print(f"Error while resolving {serviceName} to an IPv4 address: {e}") - return None - - -class KazooConnector: - def __init__(self, dnsServiceName): - self.dnsServiceName = dnsServiceName - self.host = resolveServiceToIPv4(self.dnsServiceName) - self.zk = kazoo.client.KazooClient(hosts=self.dnsServiceName) - - def connect(self): - self.zk.start() - print(f"Connected to {self.dnsServiceName} kazoo host") - - def disconnect(self): - self.zk.stop() - self.zk.close() - print(f"Disconnected from {self.dnsServiceName} kazoo host") - - def requestServerShutdown(self): - self.createKazooAtomic("/sdRequest") - print(f"Requested shutdown of {self.dnsServiceName} kazoo host") - - def createKazooAtomic(self, zNodePath): - if not self.zk.exists(zNodePath): - self.zk.create(zNodePath, b"") - - def getKazooAtomic(self, zNodePath): - if self.zk.exists(zNodePath): - data, _ = self.zk.get(zNodePath) - return data.decode() - else: - return "" - - def appendKazooAtomic(self, zNodePath, data): - while True: - try: - currentData, stat = self.zk.get(zNodePath) - newData = currentData.decode() + data - self.zk.set(zNodePath, newData.encode(), version=stat.version) - break - except kazoo.exceptions.BadVersionException: - pass - - -def healthyCheck(host): - try: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - ipv4 = host - - if host == "localhost" or host == "127.0.0.1": - ipv4 = getLocalIPV4() - - s.settimeout(5) - s.connect((ipv4, 2181)) - - print(f"Connected to {ipv4} kazoo host") - - # TODO: find lib which does nice shutdown cross platform - sdProcess = subprocess.run("zkCli.cmd get /sdRequest", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - shutdown = not sdProcess.stderr.strip() - - if shutdown: - print("Requested shutdown...") - - try: - subprocess.run(f"shutdown /s /f", check=True) - except subprocess.CalledProcessError as e: - print(f"Could not shutdown container because of: {e.stderr}") - - return True - except (socket.error, socket.timeout): - print(f"Excpetion caught while trying to connect to kazoo host: \"{socket.error}\"") - return False - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Nabla CI Pipeline nbl.ci.dev.kazoo Framework Module") - - parser.add_argument("--host", help="Kazoo Server host", type=str, default="localhost") - - args = parser.parse_args() - - if healthyCheck(args.host): - sys.exit(0) # healthy - else: - sys.exit(1) # not healthy \ No newline at end of file diff --git a/docker/scripts/ncpfmp.bat b/docker/scripts/ncpfmp.bat deleted file mode 100644 index ce13fa7ebd..0000000000 --- a/docker/scripts/ncpfmp.bat +++ /dev/null @@ -1,13 +0,0 @@ -:: Nabla CI Pipeline Framework Module proxy - -@echo off -@setlocal - -:: Get the directory where this platform proxy script is located -set scriptDirectory=%~dp0 - -:: Change the current working directory to the cross-platform Python build script -cd /d "%scriptDirectory%" - -:: Execute implementation of the pipeline build script in Python -python -m %* \ No newline at end of file From dcc537e3e018e3e1bbfb6fec888ff2213edb1498 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 13 Nov 2024 16:30:53 -0300 Subject: [PATCH 087/432] More changes following Bloom PR review --- examples_tests | 2 +- include/nbl/builtin/hlsl/workgroup/fft.hlsl | 103 +++++++++++++------- 2 files changed, 70 insertions(+), 35 deletions(-) diff --git a/examples_tests b/examples_tests index 0df6008c92..e946590f68 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0df6008c923712ed729b47cf7711c6301622fdb3 +Subproject commit e946590f686875e4bf0262b738e776fc668b58da diff --git a/include/nbl/builtin/hlsl/workgroup/fft.hlsl b/include/nbl/builtin/hlsl/workgroup/fft.hlsl index 7d43caa63a..44a0dfc1d7 100644 --- a/include/nbl/builtin/hlsl/workgroup/fft.hlsl +++ b/include/nbl/builtin/hlsl/workgroup/fft.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/memory_accessor.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/concepts.hlsl" // Caveats // - Sin and Cos in HLSL take 32-bit floats. Using this library with 64-bit floats works perfectly fine, but DXC will emit warnings @@ -90,10 +91,6 @@ namespace impl } } //namespace impl -// Get the required size (in number of uint32_t elements) of the workgroup shared memory array needed for the FFT -template -NBL_CONSTEXPR uint32_t SharedMemoryDWORDs = (sizeof(complex_t) / sizeof(uint32_t)) * WorkgroupSize; - // Util to unpack two values from the packed FFT X + iY - get outputs in the same input arguments, storing x to lo and y to hi template void unpack(NBL_REF_ARG(complex_t) lo, NBL_REF_ARG(complex_t) hi) @@ -103,7 +100,7 @@ void unpack(NBL_REF_ARG(complex_t) lo, NBL_REF_ARG(complex_t) hi lo = x; } -template +template struct FFTIndexingUtils { // This function maps the index `idx` in the output array of a Nabla FFT to the index `freqIdx` in the DFT such that `DFT[freqIdx] = NablaFFT[idx]` @@ -132,16 +129,36 @@ struct FFTIndexingUtils return getNablaIndex(getDFTMirrorIndex(getDFTIndex(idx))); } - NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocationLog2 = mpl::log2::value; - NBL_CONSTEXPR_STATIC_INLINE uint16_t FFTSizeLog2 = ElementsPerInvocationLog2 + mpl::log2::value; - NBL_CONSTEXPR_STATIC_INLINE uint32_t FFTSize = uint32_t(WorkgroupSize) * uint32_t(ElementsPerInvocation); + NBL_CONSTEXPR_STATIC_INLINE uint16_t FFTSizeLog2 = ElementsPerInvocationLog2 + WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint32_t FFTSize = uint32_t(1) << FFTSizeLog2; }; } //namespace fft -// ----------------------------------- End Utils ----------------------------------------------- +// ----------------------------------- End Utils -------------------------------------------------------------- + +namespace fft +{ + +template 0 && _WorkgroupSizeLog2 >= 5) +struct ConstevalParameters +{ + using scalar_t = _Scalar; + + NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocationLog2 = _ElementsPerInvocationLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSizeLog2 = _WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TotalSize = uint32_t(1) << (ElementsPerInvocationLog2 + WorkgroupSizeLog2); -template + NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = uint16_t(1) << ElementsPerInvocationLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1) << WorkgroupSizeLog2; + + // Required size (in number of uint32_t elements) of the workgroup shared memory array needed for the FFT + NBL_CONSTEXPR_STATIC_INLINE uint32_t SharedMemoryDWORDs = (sizeof(complex_t) / sizeof(uint32_t)) << WorkgroupSizeLog2; +}; + +} //namespace fft + +template struct FFT; // For the FFT methods below, we assume: @@ -161,9 +178,11 @@ struct FFT; // * void workgroupExecutionAndMemoryBarrier(); // 2 items per invocation forward specialization -template -struct FFT<2,false, WorkgroupSize, Scalar, device_capabilities> +template +struct FFT, device_capabilities> { + using consteval_params_t = fft::ConstevalParameters<1, WorkgroupSizeLog2, Scalar>; + template static void FFT_loop(uint32_t stride, NBL_REF_ARG(complex_t) lo, NBL_REF_ARG(complex_t) hi, uint32_t threadID, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor) { @@ -177,6 +196,8 @@ struct FFT<2,false, WorkgroupSize, Scalar, device_capabilities> template static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + // Compute the indices only once const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); const uint32_t loIx = threadID; @@ -222,12 +243,12 @@ struct FFT<2,false, WorkgroupSize, Scalar, device_capabilities> } }; - - // 2 items per invocation inverse specialization -template -struct FFT<2,true, WorkgroupSize, Scalar, device_capabilities> +template +struct FFT, device_capabilities> { + using consteval_params_t = fft::ConstevalParameters<1, WorkgroupSizeLog2, Scalar>; + template static void FFT_loop(uint32_t stride, NBL_REF_ARG(complex_t) lo, NBL_REF_ARG(complex_t) hi, uint32_t threadID, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor) { @@ -241,6 +262,8 @@ struct FFT<2,true, WorkgroupSize, Scalar, device_capabilities> template static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + // Compute the indices only once const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); const uint32_t loIx = threadID; @@ -291,17 +314,23 @@ struct FFT<2,true, WorkgroupSize, Scalar, device_capabilities> }; // Forward FFT -template -struct FFT +template +struct FFT, device_capabilities> { + using consteval_params_t = fft::ConstevalParameters; + using small_fft_consteval_params_t = fft::ConstevalParameters<1, WorkgroupSizeLog2, Scalar>; + template - static enable_if_t< (mpl::is_pot_v && K > 2), void > __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) + static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation; + [unroll] - for (uint32_t stride = (K / 2) * WorkgroupSize; stride > WorkgroupSize; stride >>= 1) + for (uint32_t stride = (ElementsPerInvocation / 2) * WorkgroupSize; stride > WorkgroupSize; stride >>= 1) { [unroll] - for (uint32_t virtualThreadID = SubgroupContiguousIndex(); virtualThreadID < (K / 2) * WorkgroupSize; virtualThreadID += WorkgroupSize) + for (uint32_t virtualThreadID = SubgroupContiguousIndex(); virtualThreadID < (ElementsPerInvocation / 2) * WorkgroupSize; virtualThreadID += WorkgroupSize) { const uint32_t loIx = ((virtualThreadID & (~(stride - 1))) << 1) | (virtualThreadID & (stride - 1)); const uint32_t hiIx = loIx | stride; @@ -318,47 +347,53 @@ struct FFT accessor.memoryBarrier(); // no execution barrier just making sure writes propagate to accessor } - // do K/2 small workgroup FFTs + // do ElementsPerInvocation/2 small workgroup FFTs accessor_adaptors::Offset offsetAccessor; offsetAccessor.accessor = accessor; [unroll] - for (uint32_t k = 0; k < K; k += 2) + for (uint32_t k = 0; k < ElementsPerInvocation; k += 2) { if (k) sharedmemAccessor.workgroupExecutionAndMemoryBarrier(); offsetAccessor.offset = WorkgroupSize*k; - FFT<2,false, WorkgroupSize, Scalar, device_capabilities>::template __call(offsetAccessor,sharedmemAccessor); + FFT::template __call(offsetAccessor,sharedmemAccessor); } accessor = offsetAccessor.accessor; } }; // Inverse FFT -template -struct FFT +template +struct FFT, device_capabilities> { + using consteval_params_t = fft::ConstevalParameters; + using small_fft_consteval_params_t = fft::ConstevalParameters<1, WorkgroupSizeLog2, Scalar>; + template - static enable_if_t< (mpl::is_pot_v && K > 2), void > __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) + static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation; + // do K/2 small workgroup FFTs accessor_adaptors::Offset offsetAccessor; offsetAccessor.accessor = accessor; [unroll] - for (uint32_t k = 0; k < K; k += 2) + for (uint32_t k = 0; k < ElementsPerInvocation; k += 2) { if (k) sharedmemAccessor.workgroupExecutionAndMemoryBarrier(); offsetAccessor.offset = WorkgroupSize*k; - FFT<2,true, WorkgroupSize, Scalar, device_capabilities>::template __call(offsetAccessor,sharedmemAccessor); + FFT::template __call(offsetAccessor,sharedmemAccessor); } accessor = offsetAccessor.accessor; [unroll] - for (uint32_t stride = 2 * WorkgroupSize; stride < K * WorkgroupSize; stride <<= 1) + for (uint32_t stride = 2 * WorkgroupSize; stride < ElementsPerInvocation * WorkgroupSize; stride <<= 1) { accessor.memoryBarrier(); // no execution barrier just making sure writes propagate to accessor [unroll] - for (uint32_t virtualThreadID = SubgroupContiguousIndex(); virtualThreadID < (K / 2) * WorkgroupSize; virtualThreadID += WorkgroupSize) + for (uint32_t virtualThreadID = SubgroupContiguousIndex(); virtualThreadID < (ElementsPerInvocation / 2) * WorkgroupSize; virtualThreadID += WorkgroupSize) { const uint32_t loIx = ((virtualThreadID & (~(stride - 1))) << 1) | (virtualThreadID & (stride - 1)); const uint32_t hiIx = loIx | stride; @@ -370,11 +405,11 @@ struct FFT hlsl::fft::DIT::radix2(hlsl::fft::twiddle(virtualThreadID & (stride - 1), stride), lo,hi); // Divide by special factor at the end - if ( (K / 2) * WorkgroupSize == stride) + if ( (ElementsPerInvocation / 2) * WorkgroupSize == stride) { divides_assign< complex_t > divAss; - divAss(lo, K / 2); - divAss(hi, K / 2); + divAss(lo, ElementsPerInvocation / 2); + divAss(hi, ElementsPerInvocation / 2); } accessor.set(loIx, lo); From 253ffafed0de434ef3eaa8cd2af12dff0fe284d9 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Wed, 13 Nov 2024 20:40:40 +0100 Subject: [PATCH 088/432] Lets commit entire git history into an image and update incrementally - create `git-cache-updater` service & `git-cache/update-git-cache.cmd` updating dcr.devsh.eu/nabla/source/git-cache:latest with entire Nabla history locally (currently full master only, but I'm going to track all branches there) --- docker/git-cache/Dockerfile | 47 ++++++++++++++++++++ docker/git-cache/compose.yml | 7 +++ docker/git-cache/update-git-cache.cmd | 64 +++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 docker/git-cache/Dockerfile create mode 100644 docker/git-cache/compose.yml create mode 100644 docker/git-cache/update-git-cache.cmd diff --git a/docker/git-cache/Dockerfile b/docker/git-cache/Dockerfile new file mode 100644 index 0000000000..fca4e232ad --- /dev/null +++ b/docker/git-cache/Dockerfile @@ -0,0 +1,47 @@ +# escape=` + +ARG BASE_IMAGE=mcr.microsoft.com/windows/servercore:ltsc2022-amd64 + +FROM ${BASE_IMAGE} + +SHELL ["cmd", "/S", "/C"] + +RUN ` + # Install Chocolatey + ` + powershell -Command "Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))" + +RUN ` + # Install Git + ` + choco install -y git --version 2.43.0 + +RUN ` + # Install CMake + ` + curl -SL --output cmake.zip https://github.com/Kitware/CMake/releases/download/v3.31.0/cmake-3.31.0-windows-x86_64.zip ` + ` + && mkdir "./cmake" ` + ` + && tar -xf cmake.zip -C "./cmake" ` + ` + && del /q cmake.zip + +WORKDIR C:\gitcache + +RUN ` + # Post environment setup + ` + git config --system protocol.*.allow always ` + ` + && git config --system url."https://github.com/".insteadOf "git@github.com:" ` + ` + && git config --system --add safe.directory * ` + ` + && setx THIS_PROJECT_GIT_CACHE "C:\gitcache" ` + ` + && git init ` + ` + && git remote add origin https://github.com/Devsh-Graphics-Programming/Nabla.git + +ENTRYPOINT ["cmd.exe", "/K"] \ No newline at end of file diff --git a/docker/git-cache/compose.yml b/docker/git-cache/compose.yml new file mode 100644 index 0000000000..d4d89c7aa9 --- /dev/null +++ b/docker/git-cache/compose.yml @@ -0,0 +1,7 @@ +services: + git-cache-updater: + build: + context: . + dockerfile: Dockerfile + image: dcr.devsh.eu/nabla/source/git-cache:latest + container_name: git.cache.update \ No newline at end of file diff --git a/docker/git-cache/update-git-cache.cmd b/docker/git-cache/update-git-cache.cmd new file mode 100644 index 0000000000..19f30faa39 --- /dev/null +++ b/docker/git-cache/update-git-cache.cmd @@ -0,0 +1,64 @@ +@echo off +REM Set cache image reference +set IMAGE_NAME=dcr.devsh.eu/nabla/source/git-cache:latest + +REM Start the git-cache-updater container in detached mode and capture the container ID +set CONTAINER_ID= +for /f "delims=" %%i in ('docker-compose run --remove-orphans -d git-cache-updater') do set CONTAINER_ID=%%i + +REM Check if the container started successfully +if "%CONTAINER_ID%"=="" ( + echo Failed to start the git-cache-updater container. + exit /b 1 +) + +echo Started container with ID %CONTAINER_ID% + +REM Fetch master commits +docker exec -i -t %CONTAINER_ID% git fetch origin master +if %errorlevel% neq 0 ( + echo "Error: git fetch failed" + docker stop %CONTAINER_ID% + exit /b %errorlevel% +) + +REM Checkout master. TODO: since it happens at runtime I could loop over /remotes' CURRENT branches and track all history +docker exec -i -t %CONTAINER_ID% git checkout master -f +if %errorlevel% neq 0 ( + echo "Error: git checkout failed" + docker stop %CONTAINER_ID% + exit /b %errorlevel% +) + +REM Update & checkout submodules with CMake +docker exec -i -t %CONTAINER_ID% "C:\cmake\cmake-3.31.0-windows-x86_64\bin\cmake" -P cmake\submodules\update.cmake +if %errorlevel% neq 0 ( + echo "Error: CMake submodule update failed" + docker stop %CONTAINER_ID% + exit /b %errorlevel% +) + +REM Stop the container before committing +docker stop %CONTAINER_ID% +if %errorlevel% neq 0 ( + echo "Error: failed to stop container" + exit /b %errorlevel% +) + +REM Commit the updated container as a new image +docker commit %CONTAINER_ID% %IMAGE_NAME% +if %errorlevel% neq 0 ( + echo "Error: failed to commit the container" + exit /b %errorlevel% +) + +echo Git cache updated and committed as %IMAGE_NAME%. + +REM Remove the update container +docker rm %CONTAINER_ID% +if %errorlevel% neq 0 ( + echo "Error: failed to remove the update container" + exit /b %errorlevel% +) + +echo Removed %CONTAINER_ID% update container. \ No newline at end of file From 8d044aa91d7b59cd9deb5f6c2030479586aa0454 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Thu, 14 Nov 2024 12:26:08 +0100 Subject: [PATCH 089/432] make the git cache work, put all into single compose file with small update script --- docker/Dockerfile | 3 +++ docker/compose.yml | 15 ++++++++++----- docker/git-cache/Dockerfile | 2 +- docker/git-cache/compose.yml | 7 ------- .../update-git-cache.cmd => update.cmd} | 16 ++++++++++++++++ 5 files changed, 30 insertions(+), 13 deletions(-) delete mode 100644 docker/git-cache/compose.yml rename docker/{git-cache/update-git-cache.cmd => update.cmd} (81%) diff --git a/docker/Dockerfile b/docker/Dockerfile index a0427ccfeb..cd85b933c3 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -113,4 +113,7 @@ RUN ` && setx NBL_CI_MODE "ON" WORKDIR ${THIS_PROJECT_NABLA_DIRECTORY} + +COPY --from=dcr.devsh.eu/nabla/source/git-cache:latest /gitcache/.git ./.git + ENTRYPOINT ["cmd.exe", "/K"] \ No newline at end of file diff --git a/docker/compose.yml b/docker/compose.yml index 04c1013081..b7bbf59f37 100644 --- a/docker/compose.yml +++ b/docker/compose.yml @@ -1,4 +1,12 @@ services: + git-cache-updater: + build: + context: ./git-cache + dockerfile: Dockerfile + image: dcr.devsh.eu/nabla/source/git-cache:latest + container_name: git.cache.update + networks: + docker_default: nabla: build: context: . @@ -10,8 +18,6 @@ services: environment: - THIS_PROJECT_WORKING_DIRECTORY=${THIS_PROJECT_WORKING_DIRECTORY} - THIS_PROJECT_NABLA_DIRECTORY=${THIS_PROJECT_NABLA_DIRECTORY} - volumes: - - nabla-cache-git:${THIS_PROJECT_NABLA_DIRECTORY}/.git networks: docker_default: deploy: @@ -19,9 +25,8 @@ services: limits: cpus: '6' memory: 12G - -volumes: - nabla-cache-git: + depends_on: + - git-cache-updater networks: docker_default: diff --git a/docker/git-cache/Dockerfile b/docker/git-cache/Dockerfile index fca4e232ad..6704869b2a 100644 --- a/docker/git-cache/Dockerfile +++ b/docker/git-cache/Dockerfile @@ -42,6 +42,6 @@ RUN ` ` && git init ` ` - && git remote add origin https://github.com/Devsh-Graphics-Programming/Nabla.git + && git remote add origin https://github.com/Devsh-Graphics-Programming/Nabla.git ENTRYPOINT ["cmd.exe", "/K"] \ No newline at end of file diff --git a/docker/git-cache/compose.yml b/docker/git-cache/compose.yml deleted file mode 100644 index d4d89c7aa9..0000000000 --- a/docker/git-cache/compose.yml +++ /dev/null @@ -1,7 +0,0 @@ -services: - git-cache-updater: - build: - context: . - dockerfile: Dockerfile - image: dcr.devsh.eu/nabla/source/git-cache:latest - container_name: git.cache.update \ No newline at end of file diff --git a/docker/git-cache/update-git-cache.cmd b/docker/update.cmd similarity index 81% rename from docker/git-cache/update-git-cache.cmd rename to docker/update.cmd index 19f30faa39..6d08e0455b 100644 --- a/docker/git-cache/update-git-cache.cmd +++ b/docker/update.cmd @@ -38,6 +38,22 @@ if %errorlevel% neq 0 ( exit /b %errorlevel% ) +docker exec -i -t %CONTAINER_ID% cmd /C "for /d %%i in (*) do if /i not %%i==.git rmdir /s /q %%i" + +if %errorlevel% neq 0 ( + echo "Error: failed to clean up files" + docker stop %CONTAINER_ID% + exit /b %errorlevel% +) + +docker exec -i -t %CONTAINER_ID% cmd /C "for %%i in (*) do if /i not %%i==.git del /q %%i" + +if %errorlevel% neq 0 ( + echo "Error: failed to clean up files" + docker stop %CONTAINER_ID% + exit /b %errorlevel% +) + REM Stop the container before committing docker stop %CONTAINER_ID% if %errorlevel% neq 0 ( From 69cd5d2116fee8e04cf8cd91d7aeb59c9ad9d29d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 19 Nov 2024 15:39:23 +0700 Subject: [PATCH 090/432] restructured integration --- include/nbl/video/IAPIConnection.h | 18 ++++-- include/nbl/video/utilities/ngfx.h | 85 ----------------------------- src/nbl/video/CVulkanConnection.cpp | 3 +- src/nbl/video/IAPIConnection.cpp | 77 +++++++++++++++++++++++++- 4 files changed, 90 insertions(+), 93 deletions(-) delete mode 100644 include/nbl/video/utilities/ngfx.h diff --git a/include/nbl/video/IAPIConnection.h b/include/nbl/video/IAPIConnection.h index 840944bab2..1ec862e05b 100644 --- a/include/nbl/video/IAPIConnection.h +++ b/include/nbl/video/IAPIConnection.h @@ -11,7 +11,6 @@ #include "nbl/video/debug/IDebugCallback.h" #include "nbl/video/utilities/renderdoc.h" -#include "nbl/video/utilities/ngfx.h" namespace nbl::video { @@ -61,16 +60,13 @@ class NBL_API2 IAPIConnection : public core::IReferenceCounted const SFeatures& getEnabledFeatures() const { return m_enabledFeatures; } - enum DebuggerType + enum SDebuggerType { EDT_NONE, EDT_RENDERDOC, EDT_NGFX }; - const DebuggerType isRunningInGraphicsDebugger() const { - return m_ngfx_api.useNGFX ? EDT_NGFX : // ngfx takes priority? - m_rdoc_api ? EDT_RENDERDOC : EDT_NONE; - } + const SDebuggerType isRunningInGraphicsDebugger() const { return m_debuggerType; } virtual bool startCapture() = 0; virtual bool endCapture() = 0; @@ -78,8 +74,18 @@ class NBL_API2 IAPIConnection : public core::IReferenceCounted IAPIConnection(const SFeatures& enabledFeatures); std::vector> m_physicalDevices; + SDebuggerType m_debuggerType; renderdoc_api_t* m_rdoc_api; + + struct SNGFXIntegration { + bool useNGFX; + + bool injectNGFXToProcess(); + bool executeNGFXCommand(); + }; + using ngfx_api_t = SNGFXIntegration; ngfx_api_t m_ngfx_api; + SFeatures m_enabledFeatures = {}; }; diff --git a/include/nbl/video/utilities/ngfx.h b/include/nbl/video/utilities/ngfx.h deleted file mode 100644 index 91eabfa2b3..0000000000 --- a/include/nbl/video/utilities/ngfx.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef _NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ -#define _NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ - -// TODO: hopefully this is temporary -#include "C:\Program Files\NVIDIA Corporation\Nsight Graphics 2024.1.0\SDKs\NsightGraphicsSDK\0.8.0\include\NGFX_Injection.h" - -namespace nbl::video -{ - struct SNGFXIntegration - { - bool useNGFX; - NGFX_Injection_InstallationInfo versionInfo; - }; - - inline bool injectNGFXToProcess(SNGFXIntegration& api) - { - uint32_t numInstallations = 0; - auto result = NGFX_Injection_EnumerateInstallations(&numInstallations, nullptr); - if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) - { - api.useNGFX = false; - return false; - } - - std::vector installations(numInstallations); - result = NGFX_Injection_EnumerateInstallations(&numInstallations, installations.data()); - if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) - { - api.useNGFX = false; - return false; - } - - // get latest installation - api.versionInfo = installations.back(); - - uint32_t numActivities = 0; - result = NGFX_Injection_EnumerateActivities(&api.versionInfo, &numActivities, nullptr); - if (numActivities == 0 || NGFX_INJECTION_RESULT_OK != result) - { - api.useNGFX = false; - return false; - } - - std::vector activities(numActivities); - result = NGFX_Injection_EnumerateActivities(&api.versionInfo, &numActivities, activities.data()); - if (NGFX_INJECTION_RESULT_OK != result) - { - api.useNGFX = false; - return false; - } - - const NGFX_Injection_Activity* pActivityToInject = nullptr; - for (const NGFX_Injection_Activity& activity : activities) - { - if (activity.type == NGFX_INJECTION_ACTIVITY_FRAME_DEBUGGER) // only want frame debugger - { - pActivityToInject = &activity; - break; - } - } - - if (!pActivityToInject) { - api.useNGFX = false; - return false; - } - - result = NGFX_Injection_InjectToProcess(&api.versionInfo, pActivityToInject); - if (NGFX_INJECTION_RESULT_OK != result) - { - api.useNGFX = false; - return false; - } - - return true; - } - - inline void executeNGFXCommand() - { - NGFX_Injection_ExecuteActivityCommand(); - } - - using ngfx_api_t = SNGFXIntegration; -} - -#endif //_NBL_VIDEO_UTILITIES_NGFX_H_INCLUDED_ \ No newline at end of file diff --git a/src/nbl/video/CVulkanConnection.cpp b/src/nbl/video/CVulkanConnection.cpp index e1a33a1418..da2e08b6bc 100644 --- a/src/nbl/video/CVulkanConnection.cpp +++ b/src/nbl/video/CVulkanConnection.cpp @@ -339,7 +339,7 @@ bool CVulkanConnection::startCapture() if (debugType == EDT_RENDERDOC) m_rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(m_vkInstance), NULL); else - executeNGFXCommand(); + m_ngfx_api.executeNGFXCommand(); return true; } @@ -360,6 +360,7 @@ bool CVulkanConnection::endCapture() if (debugType == EDT_RENDERDOC) m_rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(m_vkInstance), NULL); // no equivalent end frame capture for ngfx, ends captures on next frame delimiter + // see https://www.reddit.com/r/GraphicsProgramming/comments/w0hl9o/graphics_debugger_record_before_first_frame/ flag.clear(); return true; } diff --git a/src/nbl/video/IAPIConnection.cpp b/src/nbl/video/IAPIConnection.cpp index 8dc156bb94..e679363d53 100644 --- a/src/nbl/video/IAPIConnection.cpp +++ b/src/nbl/video/IAPIConnection.cpp @@ -4,6 +4,9 @@ #include "nbl/video/utilities/renderdoc.h" #include "nbl/video/utilities/ngfx.h" +// TODO: temporary hopefully +#include "C:\Program Files\NVIDIA Corporation\Nsight Graphics 2024.1.0\SDKs\NsightGraphicsSDK\0.8.0\include\NGFX_Injection.h" + #if defined(_NBL_POSIX_API_) #include #endif @@ -11,6 +14,7 @@ namespace nbl::video { + std::span IAPIConnection::getPhysicalDevices() const { static_assert(sizeof(std::unique_ptr) == sizeof(void*)); @@ -46,8 +50,79 @@ IAPIConnection::IAPIConnection(const SFeatures& enabledFeatures) #endif // probably is platform agnostic, for now - injectNGFXToProcess(m_ngfx_api); + m_ngfx_api.injectNGFXToProcess(); + + m_debuggerType = m_ngfx_api.useNGFX ? EDT_NGFX : // ngfx takes priority? + m_rdoc_api ? EDT_RENDERDOC : EDT_NONE; + } +} + +bool IAPIConnection::SNGFXIntegration::injectNGFXToProcess() +{ + uint32_t numInstallations = 0; + auto result = NGFX_Injection_EnumerateInstallations(&numInstallations, nullptr); + if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } + + std::vector installations(numInstallations); + result = NGFX_Injection_EnumerateInstallations(&numInstallations, installations.data()); + if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } + + // get latest installation + NGFX_Injection_InstallationInfo versionInfo = installations.back(); + + uint32_t numActivities = 0; + result = NGFX_Injection_EnumerateActivities(&versionInfo, &numActivities, nullptr); + if (numActivities == 0 || NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; } + + std::vector activities(numActivities); + result = NGFX_Injection_EnumerateActivities(&versionInfo, &numActivities, activities.data()); + if (NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } + + const NGFX_Injection_Activity* pActivityToInject = nullptr; + for (const NGFX_Injection_Activity& activity : activities) + { + if (activity.type == NGFX_INJECTION_ACTIVITY_FRAME_DEBUGGER) // only want frame debugger + { + pActivityToInject = &activity; + break; + } + } + + if (!pActivityToInject) { + useNGFX = false; + return false; + } + + result = NGFX_Injection_InjectToProcess(&versionInfo, pActivityToInject); + if (NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } + + useNGFX = true; + return true; +} + +bool IAPIConnection::SNGFXIntegration::executeNGFXCommand() +{ + return NGFX_Injection_ExecuteActivityCommand() == NGFX_INJECTION_RESULT_OK; } } \ No newline at end of file From 65bbad8adc7d9866381f7ea604df85ab4c81283d Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 19 Nov 2024 14:51:30 -0300 Subject: [PATCH 091/432] Adds ternary op for complex numbers --- include/nbl/builtin/hlsl/complex.hlsl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 249013b1b9..69d16bf850 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -379,6 +379,19 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } +// Annoyed at having to write a lot of boilerplate to do a select +// Essentially returns what you'd expect from doing `condition ? a : b` +template +complex_t ternaryOperator(bool condition, NBL_CONST_REF_ARG(complex_t) a, NBL_CONST_REF_ARG(complex_t) b) +{ + const vector aVector = vector(a.real(), a.imag()); + const vector bVector = vector(b.real(), b.imag()); + const vector resultVector = condition ? aVector : bVector; + const complex_t result = { resultVector.x, resultVector.y }; + return result; +} + + } } From 13dd52d1c1e696450df2cda89e1f137847b58cc4 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 19 Nov 2024 17:01:40 -0300 Subject: [PATCH 092/432] Restore submodule pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 22f9294f9a..2cd9e91a4b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 22f9294f9ad1c35f5909411248dd08c7e1ebbbe2 +Subproject commit 2cd9e91a4b2cb96d22fda18d1d8ab10d1c423328 From 639cf78243ae56f89f176c5e37f638f5d1896645 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Wed, 20 Nov 2024 11:17:46 +0100 Subject: [PATCH 093/432] add 3rdparty/ngfx/ngfx.cmake capable of finding the NSight GFX SDK & creating interface `ngfx` interface target on success, allow for picking found versions --- 3rdparty/CMakeLists.txt | 3 ++ 3rdparty/ngfx/ngfx.cmake | 63 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 3rdparty/ngfx/ngfx.cmake diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 1d0b3fee0f..1aa44383e3 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -296,6 +296,9 @@ NBL_ADD_GIT_TRACKING_META_LIBRARY(nabla "${NBL_ROOT_PATH}") NBL_ADD_GIT_TRACKING_META_LIBRARY(dxc "${CMAKE_CURRENT_SOURCE_DIR}/dxc/dxc") NBL_GENERATE_GIT_TRACKING_META() +# NGFX +include(ngfx/ngfx.cmake) + if(NBL_BUILD_IMGUI) set(NBL_IMGUI_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/imgui") set(NBL_IMGUI_TEST_ENGINE_PROJECT_ROOT "${THIRD_PARTY_SOURCE_DIR}/imgui_test_engine") diff --git a/3rdparty/ngfx/ngfx.cmake b/3rdparty/ngfx/ngfx.cmake new file mode 100644 index 0000000000..6505f5d8ac --- /dev/null +++ b/3rdparty/ngfx/ngfx.cmake @@ -0,0 +1,63 @@ +option(NBL_BUILD_WITH_NGFX "Enable NGFX build" OFF) + +# NOTE: on windows default installation path is: +# "C:/Program Files/NVIDIA Corporation/Nsight Graphics /SDKs/NsightGraphicsSDK" <- define as "NGFX_SDK" environment variable +# then you can pick SDK version with "NGFX_SDK_VERSION" cache variable (CMake GUI list supported) + +if(NBL_BUILD_WITH_NGFX) + if(NOT DEFINED ENV{NGFX_SDK}) + message(FATAL_ERROR "\"NGFX_SDK\" environment variable must be defined to build with NBL_BUILD_WITH_NGFX enabled!") + endif() + + set(NGFX_SDK "$ENV{NGFX_SDK}") + cmake_path(NORMAL_PATH NGFX_SDK OUTPUT_VARIABLE NGFX_SDK) + + if(NOT EXISTS "${NGFX_SDK}") + message(FATAL_ERROR "Found \"NGFX_SDK\" environment variable but it is invalid, env:NGFX_SDK=\"${NGFX_SDK}\" doesn't exist!") + endif() + + file(GLOB ENTRIES "${NGFX_SDK}/*") + + set(NGFX_VERSIONS "") + foreach(ENTRY ${ENTRIES}) + if(IS_DIRECTORY ${ENTRY}) + list(APPEND NGFX_VERSIONS ${ENTRY}) + endif() + endforeach() + + if(NOT NGFX_VERSIONS) + message(FATAL_ERROR "Could not find any NGFX SDK Version!") + endif() + + list(TRANSFORM NGFX_VERSIONS REPLACE "${NGFX_SDK}/" "") + list(SORT NGFX_VERSIONS) + list(GET NGFX_VERSIONS -1 LATEST_NGFX_VERSION) + + # on the cache variable init pick the latest version, then let user pick from list + set(NGFX_SDK_VERSION "${LATEST_NGFX_VERSION}" CACHE STRING "NGFX SDK Version") + set_property(CACHE NGFX_SDK_VERSION PROPERTY STRINGS ${NGFX_VERSIONS}) + + set(NGFX_SDK_BASE "${NGFX_SDK}/$CACHE{NGFX_SDK_VERSION}") + + # TODO: wanna support more *host* platforms? (*) + # NOTE: also I'm hardcoding windows x64 library requests till I know the answer for (*) + find_file(NBL_NGFX_INJECTION_HEADER NGFX_Injection.h PATHS ${NGFX_SDK_BASE}/include) + find_file(NBL_NGFX_INJECTION_DLL NGFX_Injection.dll PATHS ${NGFX_SDK_BASE}/lib/x64) + find_file(NBL_NGFX_INJECTION_IMPORT_LIBRARY NGFX_Injection.lib PATHS ${NGFX_SDK_BASE}/lib/x64) + + if(NBL_NGFX_INJECTION_HEADER AND NBL_NGFX_INJECTION_DLL AND NBL_NGFX_INJECTION_IMPORT_LIBRARY) + message(STATUS "Enabled build with NVIDIA Nsight Graphics SDK $CACHE{NGFX_SDK_VERSION}\nlocated in: \"${NGFX_SDK_BASE}\"") + else() + message(STATUS "Could not enable build with NVIDIA Nsight Graphics SDK $CACHE{NGFX_SDK_VERSION} - invalid components!") + message(STATUS "Located in: \"${NGFX_SDK_BASE}\"") + message(STATUS "NBL_NGFX_INJECTION_HEADER=\"${NBL_NGFX_INJECTION_HEADER}\"") + message(STATUS "NBL_NGFX_INJECTION_DLL=\"${NBL_NGFX_INJECTION_DLL}\"") + message(STATUS "NBL_NGFX_INJECTION_IMPORT_LIBRARY=\"${NBL_NGFX_INJECTION_IMPORT_LIBRARY}\"") + message(FATAL_ERROR "You installation may be corupted, please fix it and re-run CMake or disable NBL_BUILD_WITH_NGFX!") + endif() + + add_library(ngfx INTERFACE) + target_sources(ngfx INTERFACE "${NBL_NGFX_INJECTION_HEADER}") + target_include_directories(ngfx INTERFACE "${NGFX_SDK_BASE}/include") + target_link_libraries(ngfx INTERFACE "${NBL_NGFX_INJECTION_IMPORT_LIBRARY}") +endif() \ No newline at end of file From 048d093f56546b09d6387626ff2af280168fb2e1 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Wed, 20 Nov 2024 15:25:43 +0100 Subject: [PATCH 094/432] integrate NGFX with Nabla build system, delay load the DLL on windows target platforms --- 3rdparty/ngfx/ngfx.cmake | 10 +- include/nbl/video/IAPIConnection.h | 10 +- src/nbl/CMakeLists.txt | 15 +++ src/nbl/video/IAPIConnection.cpp | 171 ++++++++++++++++++++--------- 4 files changed, 148 insertions(+), 58 deletions(-) diff --git a/3rdparty/ngfx/ngfx.cmake b/3rdparty/ngfx/ngfx.cmake index 6505f5d8ac..f69e05ac9d 100644 --- a/3rdparty/ngfx/ngfx.cmake +++ b/3rdparty/ngfx/ngfx.cmake @@ -37,7 +37,8 @@ if(NBL_BUILD_WITH_NGFX) set(NGFX_SDK_VERSION "${LATEST_NGFX_VERSION}" CACHE STRING "NGFX SDK Version") set_property(CACHE NGFX_SDK_VERSION PROPERTY STRINGS ${NGFX_VERSIONS}) - set(NGFX_SDK_BASE "${NGFX_SDK}/$CACHE{NGFX_SDK_VERSION}") + set(NGFX_SDK_VERSION "$CACHE{NGFX_SDK_VERSION}") + set(NGFX_SDK_BASE "${NGFX_SDK}/${NGFX_SDK_VERSION}") # TODO: wanna support more *host* platforms? (*) # NOTE: also I'm hardcoding windows x64 library requests till I know the answer for (*) @@ -46,9 +47,9 @@ if(NBL_BUILD_WITH_NGFX) find_file(NBL_NGFX_INJECTION_IMPORT_LIBRARY NGFX_Injection.lib PATHS ${NGFX_SDK_BASE}/lib/x64) if(NBL_NGFX_INJECTION_HEADER AND NBL_NGFX_INJECTION_DLL AND NBL_NGFX_INJECTION_IMPORT_LIBRARY) - message(STATUS "Enabled build with NVIDIA Nsight Graphics SDK $CACHE{NGFX_SDK_VERSION}\nlocated in: \"${NGFX_SDK_BASE}\"") + message(STATUS "Enabled build with NVIDIA Nsight Graphics SDK ${NGFX_SDK_VERSION}\nlocated in: \"${NGFX_SDK_BASE}\"") else() - message(STATUS "Could not enable build with NVIDIA Nsight Graphics SDK $CACHE{NGFX_SDK_VERSION} - invalid components!") + message(STATUS "Could not enable build with NVIDIA Nsight Graphics SDK ${NGFX_SDK_VERSION} - invalid components!") message(STATUS "Located in: \"${NGFX_SDK_BASE}\"") message(STATUS "NBL_NGFX_INJECTION_HEADER=\"${NBL_NGFX_INJECTION_HEADER}\"") message(STATUS "NBL_NGFX_INJECTION_DLL=\"${NBL_NGFX_INJECTION_DLL}\"") @@ -60,4 +61,7 @@ if(NBL_BUILD_WITH_NGFX) target_sources(ngfx INTERFACE "${NBL_NGFX_INJECTION_HEADER}") target_include_directories(ngfx INTERFACE "${NGFX_SDK_BASE}/include") target_link_libraries(ngfx INTERFACE "${NBL_NGFX_INJECTION_IMPORT_LIBRARY}") + target_link_options(ngfx INTERFACE "/DELAYLOAD:NGFX_Injection.dll") + target_compile_definitions(ngfx INTERFACE NGFX_INJECTION_DLL_DIR="${NGFX_SDK_BASE}/lib/x64") + target_compile_definitions(ngfx INTERFACE NGFX_VERSION="${NGFX_SDK_VERSION}") endif() \ No newline at end of file diff --git a/include/nbl/video/IAPIConnection.h b/include/nbl/video/IAPIConnection.h index 1ec862e05b..ccb7965822 100644 --- a/include/nbl/video/IAPIConnection.h +++ b/include/nbl/video/IAPIConnection.h @@ -77,11 +77,17 @@ class NBL_API2 IAPIConnection : public core::IReferenceCounted SDebuggerType m_debuggerType; renderdoc_api_t* m_rdoc_api; - struct SNGFXIntegration { - bool useNGFX; + struct SNGFXIntegration + { + SNGFXIntegration(); + + bool useNGFX = false; bool injectNGFXToProcess(); bool executeNGFXCommand(); + inline bool isAPILoaded() { return m_loaded; } + private: + const bool m_loaded; }; using ngfx_api_t = SNGFXIntegration; ngfx_api_t m_ngfx_api; diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 83845b9c84..75f3dd3bec 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -659,6 +659,21 @@ write_source_definitions("${_NBL_DEFINE_FILE_WRAPPER_}" "${_NBL_SOURCE_DEFINITIO # git version tracking target_link_libraries(Nabla PUBLIC gtml) +# NGFX +if(TARGET ngfx) + if(NBL_STATIC_BUILD) + target_link_libraries(Nabla INTERFACE ngfx) + else() + target_link_libraries(Nabla PRIVATE ngfx) + endif() + + target_include_directories(Nabla PRIVATE $) + target_compile_definitions(Nabla + PRIVATE NBL_BUILD_WITH_NGFX + PRIVATE $ + ) +endif() + #on MSVC it won't compile without this option! if (MSVC) target_compile_options(Nabla PUBLIC /bigobj) diff --git a/src/nbl/video/IAPIConnection.cpp b/src/nbl/video/IAPIConnection.cpp index e679363d53..b3d697f3a7 100644 --- a/src/nbl/video/IAPIConnection.cpp +++ b/src/nbl/video/IAPIConnection.cpp @@ -2,10 +2,12 @@ #include "nbl/video/IPhysicalDevice.h" #include "nbl/video/utilities/renderdoc.h" -#include "nbl/video/utilities/ngfx.h" -// TODO: temporary hopefully -#include "C:\Program Files\NVIDIA Corporation\Nsight Graphics 2024.1.0\SDKs\NsightGraphicsSDK\0.8.0\include\NGFX_Injection.h" +#include "nbl/system/CSystemWin32.h" + +#ifdef NBL_BUILD_WITH_NGFX +#include "NGFX_Injection.h" +#endif #if defined(_NBL_POSIX_API_) #include @@ -14,7 +16,6 @@ namespace nbl::video { - std::span IAPIConnection::getPhysicalDevices() const { static_assert(sizeof(std::unique_ptr) == sizeof(void*)); @@ -59,70 +60,134 @@ IAPIConnection::IAPIConnection(const SFeatures& enabledFeatures) bool IAPIConnection::SNGFXIntegration::injectNGFXToProcess() { - uint32_t numInstallations = 0; - auto result = NGFX_Injection_EnumerateInstallations(&numInstallations, nullptr); - if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) + #ifdef NBL_BUILD_WITH_NGFX + if (m_loaded) //! this check is mandatory! { - useNGFX = false; - return false; - } + uint32_t numInstallations = 0; + auto result = NGFX_Injection_EnumerateInstallations(&numInstallations, nullptr); + if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } - std::vector installations(numInstallations); - result = NGFX_Injection_EnumerateInstallations(&numInstallations, installations.data()); - if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) - { - useNGFX = false; - return false; - } + std::vector installations(numInstallations); + result = NGFX_Injection_EnumerateInstallations(&numInstallations, installations.data()); + if (numInstallations == 0 || NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } - // get latest installation - NGFX_Injection_InstallationInfo versionInfo = installations.back(); + // get latest installation + NGFX_Injection_InstallationInfo versionInfo = installations.back(); - uint32_t numActivities = 0; - result = NGFX_Injection_EnumerateActivities(&versionInfo, &numActivities, nullptr); - if (numActivities == 0 || NGFX_INJECTION_RESULT_OK != result) - { - useNGFX = false; - return false; - } + uint32_t numActivities = 0; + result = NGFX_Injection_EnumerateActivities(&versionInfo, &numActivities, nullptr); + if (numActivities == 0 || NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } - std::vector activities(numActivities); - result = NGFX_Injection_EnumerateActivities(&versionInfo, &numActivities, activities.data()); - if (NGFX_INJECTION_RESULT_OK != result) - { - useNGFX = false; - return false; - } + std::vector activities(numActivities); + result = NGFX_Injection_EnumerateActivities(&versionInfo, &numActivities, activities.data()); + if (NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } - const NGFX_Injection_Activity* pActivityToInject = nullptr; - for (const NGFX_Injection_Activity& activity : activities) - { - if (activity.type == NGFX_INJECTION_ACTIVITY_FRAME_DEBUGGER) // only want frame debugger + const NGFX_Injection_Activity* pActivityToInject = nullptr; + for (const NGFX_Injection_Activity& activity : activities) { - pActivityToInject = &activity; - break; + if (activity.type == NGFX_INJECTION_ACTIVITY_FRAME_DEBUGGER) // only want frame debugger + { + pActivityToInject = &activity; + break; + } } - } - if (!pActivityToInject) { - useNGFX = false; - return false; - } + if (!pActivityToInject) { + useNGFX = false; + return false; + } - result = NGFX_Injection_InjectToProcess(&versionInfo, pActivityToInject); - if (NGFX_INJECTION_RESULT_OK != result) - { - useNGFX = false; - return false; - } + result = NGFX_Injection_InjectToProcess(&versionInfo, pActivityToInject); + if (NGFX_INJECTION_RESULT_OK != result) + { + useNGFX = false; + return false; + } + + useNGFX = true; + + return true; + } // optional TOOD: could log on "else" + #endif // NBL_BUILD_WITH_NGFX - useNGFX = true; - return true; + return false; } bool IAPIConnection::SNGFXIntegration::executeNGFXCommand() { - return NGFX_Injection_ExecuteActivityCommand() == NGFX_INJECTION_RESULT_OK; + #ifdef NBL_BUILD_WITH_NGFX + if(m_loaded) //! this check is mandatory! + return NGFX_Injection_ExecuteActivityCommand() == NGFX_INJECTION_RESULT_OK; // optional TOOD: could log on "else" + #endif // NBL_BUILD_WITH_NGFX + + return false; } +IAPIConnection::SNGFXIntegration::SNGFXIntegration() + : useNGFX(false /*??*/), m_loaded([]() -> bool + { +#ifdef NBL_BUILD_WITH_NGFX + //! absolute path to official install NGFX SDK runtime directory + auto getOfficialRuntimeDirectory = []() + { + const char* sdk = std::getenv("NGFX_SDK"); + const char* version = std::getenv("NGFX_VERSION"); + const bool composed = sdk && version; + + if (composed) + { + const auto directory = system::path(sdk) / system::path(version) / "lib" / "x64"; + + if (std::filesystem::exists(directory)) + return directory; + } + + return system::path(""); + }; + + //! batch request with priority order & custom Nabla runtime search, I'm assuming we are loading the runtime from official SDK not custom location + //! one question is if we should have any constraints for min/max version, maybe force the "version" + //! to match the "NGFX_VERSION" define so to "what we built with", or don't have any - just like now + + #if defined(_NBL_PLATFORM_WINDOWS_) + static constexpr std::string_view NGFXMODULE = "NGFX_Injection.dll"; + HMODULE isAlreadyLoaded = GetModuleHandleA(NGFXMODULE.data()); + + if (!isAlreadyLoaded) + { + const auto dll = getOfficialRuntimeDirectory() / NGFXMODULE.data(); + const HRESULT hook = system::CSystemWin32::delayLoadDLL(NGFXMODULE.data(), { NGFX_INJECTION_DLL_DIR, dll.parent_path() }); + + //! don't be scared if you see "No symbols loaded" - you will not hit "false" in this case, the DLL will get loaded if found, + //! proc addresses will be resolved correctly but status will scream "FAILED" because we don't have any PDB to load + if (FAILED(hook)) + return false; + } + #else + #error "TODO!" + #endif + + return true; +#else + return false; // no NGFX build -> no API to load +#endif + }()) +{} + } \ No newline at end of file From e618e58f284dcf68c86f62173d832732b53e7496 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 22 Nov 2024 21:19:40 -0300 Subject: [PATCH 095/432] Yet more utils, such as bitreversal --- include/nbl/builtin/hlsl/fft/common.hlsl | 61 ++++++++++++++++++++- include/nbl/builtin/hlsl/workgroup/fft.hlsl | 43 ++++++++++++--- 2 files changed, 94 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/fft/common.hlsl b/include/nbl/builtin/hlsl/fft/common.hlsl index fa54da5a3f..3456123a5f 100644 --- a/include/nbl/builtin/hlsl/fft/common.hlsl +++ b/include/nbl/builtin/hlsl/fft/common.hlsl @@ -1,9 +1,47 @@ #ifndef _NBL_BUILTIN_HLSL_FFT_COMMON_INCLUDED_ #define _NBL_BUILTIN_HLSL_FFT_COMMON_INCLUDED_ -#include "nbl/builtin/hlsl/complex.hlsl" #include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#ifndef __HLSL_VERSION +#include + +namespace nbl +{ +namespace hlsl +{ +namespace fft +{ + +static inline uint32_t3 padDimensions(uint32_t3 dimensions, std::span axes, bool realFFT = false) +{ + uint16_t axisCount = 0; + for (auto i : axes) + { + dimensions[i] = core::roundUpToPoT(dimensions[i]); + if (realFFT && !axisCount++) + dimensions[i] /= 2; + } + return dimensions; +} + +static inline uint64_t getOutputBufferSize(const uint32_t3& inputDimensions, uint32_t numChannels, std::span axes, bool realFFT = false, bool halfFloats = false) +{ + auto paddedDims = padDimensions(inputDimensions, axes); + uint64_t numberOfComplexElements = paddedDims[0] * paddedDims[1] * paddedDims[2] * numChannels; + return 2 * numberOfComplexElements * (halfFloats ? sizeof(float16_t) : sizeof(float32_t)); +} + + +} +} +} + +#else + +#include "nbl/builtin/hlsl/complex.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/concepts.hlsl" namespace nbl { @@ -53,8 +91,29 @@ using DIT = DIX; template using DIF = DIX; + +// ------------------------------------------------- Utils --------------------------------------------------------- +// +// Util to unpack two values from the packed FFT X + iY - get outputs in the same input arguments, storing x to lo and y to hi +template +void unpack(NBL_REF_ARG(complex_t) lo, NBL_REF_ARG(complex_t) hi) +{ + complex_t x = (lo + conj(hi)) * Scalar(0.5); + hi = rotateRight(lo - conj(hi)) * Scalar(0.5); + lo = x; } + +// Bit-reverses T as a binary string of length given by Bits +template && Bits <= sizeof(T) * 8) +T bitReverse(T value) +{ + return glsl::bitfieldReverse(value) >> (sizeof(T) * 8 - Bits); } + } +} +} + +#endif #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/workgroup/fft.hlsl b/include/nbl/builtin/hlsl/workgroup/fft.hlsl index 44a0dfc1d7..dc1aa0576e 100644 --- a/include/nbl/builtin/hlsl/workgroup/fft.hlsl +++ b/include/nbl/builtin/hlsl/workgroup/fft.hlsl @@ -1,6 +1,37 @@ #ifndef _NBL_BUILTIN_HLSL_WORKGROUP_FFT_INCLUDED_ #define _NBL_BUILTIN_HLSL_WORKGROUP_FFT_INCLUDED_ +#include +#include + +#ifndef __HLSL_VERSION +#include + +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace fft +{ + +inline std::pair optimalFFTParameters(const video::ILogicalDevice* device, uint32_t inputArrayLength) +{ + uint32_t maxWorkgroupSize = *device->getPhysicalDevice()->getLimits().maxWorkgroupSize; + // This is the logic found in core::roundUpToPoT to get the log2 + uint16_t workgroupSizeLog2 = 1u + hlsl::findMSB(core::min(inputArrayLength / 2, maxWorkgroupSize) - 1u); + uint16_t elementPerInvocationLog2 = 1u + hlsl::findMSB(core::max((inputArrayLength >> workgroupSizeLog2) - 1u, 1u)); + return { elementPerInvocationLog2, workgroupSizeLog2 }; +} + +} +} +} +} + +#else + #include "nbl/builtin/hlsl/subgroup/fft.hlsl" #include "nbl/builtin/hlsl/workgroup/basic.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" @@ -91,15 +122,6 @@ namespace impl } } //namespace impl -// Util to unpack two values from the packed FFT X + iY - get outputs in the same input arguments, storing x to lo and y to hi -template -void unpack(NBL_REF_ARG(complex_t) lo, NBL_REF_ARG(complex_t) hi) -{ - complex_t x = (lo + conj(hi)) * Scalar(0.5); - hi = rotateRight(lo - conj(hi)) * Scalar(0.5); - lo = x; -} - template struct FFTIndexingUtils { @@ -425,4 +447,7 @@ struct FFT Date: Tue, 26 Nov 2024 09:58:13 -0800 Subject: [PATCH 096/432] Implemented nbl::hlsl::mul --- examples_tests | 2 +- .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 47 +++++------ .../builtin/hlsl/emulated/float64_t_impl.hlsl | 23 +++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 80 ++++++++----------- include/nbl/builtin/hlsl/functional.hlsl | 18 +++++ include/nbl/builtin/hlsl/ieee754/impl.hlsl | 10 +-- .../gauss_legendre/gauss_legendre.hlsl | 1 - .../nbl/builtin/hlsl/portable/float64_t.hlsl | 8 +- .../nbl/builtin/hlsl/portable/matrix_t.hlsl | 68 ++++++++-------- include/nbl/builtin/hlsl/shapes/beziers.hlsl | 7 +- include/nbl/builtin/hlsl/type_traits.hlsl | 3 + 11 files changed, 133 insertions(+), 134 deletions(-) diff --git a/examples_tests b/examples_tests index 049772c9f3..ad0e24f799 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 049772c9f3a37a17a2332ed0d7c3f78375d218ec +Subproject commit ad0e24f799afe4b7bb7f1675b061e8408d95a0dd diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index eec7a27c46..cf2a7e494f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -38,22 +38,22 @@ namespace hlsl NBL_CONSTEXPR_STATIC_INLINE this_t create(int32_t val) { - return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(int64_t(val))); + return bit_cast(emulated_float64_t_impl::reinterpretAsFloat64BitPattern(int64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE this_t create(int64_t val) { - return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast(emulated_float64_t_impl::reinterpretAsFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE this_t create(uint32_t val) { - return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(uint64_t(val))); + return bit_cast(emulated_float64_t_impl::reinterpretAsFloat64BitPattern(uint64_t(val))); } NBL_CONSTEXPR_STATIC_INLINE this_t create(uint64_t val) { - return bit_cast(emulated_float64_t_impl::castToUint64WithFloat64BitPattern(val)); + return bit_cast(emulated_float64_t_impl::reinterpretAsFloat64BitPattern(val)); } NBL_CONSTEXPR_STATIC_INLINE this_t create(float32_t val) @@ -92,24 +92,14 @@ namespace hlsl { if(!FastMath) { - if (tgmath::isNaN(data) || tgmath::isNaN(rhs.data)) - return bit_cast(ieee754::traits::quietNaN); - - if (emulated_float64_t_impl::areBothInfinity(data, rhs.data)) - { - uint64_t lhsSign = data & ieee754::traits::signMask; - uint64_t rhsSign = rhs.data & ieee754::traits::signMask; - - if (lhsSign == rhsSign) - return bit_cast(ieee754::traits::inf | lhsSign); - else if (lhsSign || rhsSign) - return bit_cast(ieee754::traits::quietNaN | ieee754::traits::signMask); - } - + const bool isRhsInf = tgmath::isInf(rhs.data); if (tgmath::isInf(data)) + { + if (isRhsInf && ((data ^ rhs.data) & ieee754::traits::signMask)) + return bit_cast(ieee754::traits::quietNaN); return bit_cast(data); - - if (tgmath::isInf(rhs.data)) + } + else if (isRhsInf) return bit_cast(rhs.data); } @@ -128,17 +118,18 @@ namespace hlsl return bit_cast(ieee754::traits::inf | ieee754::extractSignPreserveBitPattern(max(lhsData, rhsData))); } - if (emulated_float64_t_impl::areBothZero(lhsData, rhsData)) - { - if (lhsSign == rhsSign) - return bit_cast(lhsSign); - else - return bit_cast(0ull); - } + const bool isRhsZero = emulated_float64_t_impl::isZero(rhsData); if (emulated_float64_t_impl::isZero(lhsData)) + { + if(isRhsZero) + return bit_cast((uint64_t(lhsData == rhsData) << 63) & lhsSign); + return bit_cast(rhsData); - if (emulated_float64_t_impl::isZero(rhsData)) + } + else if (isRhsZero) + { return bit_cast(lhsData); + } uint64_t lhsNormMantissa = ieee754::extractNormalizeMantissa(lhsData); uint64_t rhsNormMantissa = ieee754::extractNormalizeMantissa(rhsData); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index e9f7b098ae..708c7ae4a3 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -103,7 +103,7 @@ inline int _findMSB(uint64_t val) #endif } -inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) +inline uint64_t reinterpretAsFloat64BitPattern(uint64_t val) { if (isZero(val)) return val; @@ -149,25 +149,20 @@ inline uint64_t castToUint64WithFloat64BitPattern(uint64_t val) return biasedExp | mantissa; }; -inline uint64_t castToUint64WithFloat64BitPattern(int64_t val) +inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) { const uint64_t sign = val & ieee754::traits::signMask; const uint64_t absVal = uint64_t(abs(val)); - return sign | castToUint64WithFloat64BitPattern(absVal); + return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint32_t2 umulExtended(uint32_t lhs, uint32_t rhs) +template +NBL_CONSTEXPR_INLINE_FUNC Uint flushDenormToZero(const int extractedBiasedExp, Uint value) { - uint64_t product = uint64_t(lhs) * uint64_t(rhs); - uint32_t2 output; - output.x = uint32_t((product & 0xFFFFFFFF00000000) >> 32); - output.y = uint32_t(product & 0x00000000FFFFFFFFull); - return output; -} - -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t extractedBiasedExponent, uint64_t value) -{ - return extractedBiasedExponent ? value : ieee754::extractSignPreserveBitPattern(value); + // TODO: + // static_assert(is_unsigned::value); + using AsFloat = typename float_of_size::type; + return extractedBiasedExp ? value : (value & ieee754::traits::signMask); } NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 768c6b8c85..89c78936a6 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include namespace nbl { @@ -320,6 +321,10 @@ struct emulated_vector : CRTP return output; } + //DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) @@ -382,15 +387,6 @@ struct is_valid_emulated_vector is_same_v >; }; -template -struct array_get -{ - T operator()(NBL_REF_ARG(U) vec, const I ix) - { - return vec[ix]; - } -}; - template struct array_get, ComponentType, uint32_t> { @@ -400,43 +396,33 @@ struct array_get, ComponentType, uint32_t> } }; -template -struct array_get, ComponentType, uint32_t> -{ - ComponentType operator()(NBL_REF_ARG(emulated_vector_t3) vec, const uint32_t ix) - { - return vec.getComponent(ix); - } -}; - -template -struct array_get, ComponentType, uint32_t> -{ - ComponentType operator()(NBL_REF_ARG(emulated_vector_t4) vec, const uint32_t ix) - { - return vec.getComponent(ix); - } -}; - -#undef DEFINE_EMULATED_VECTOR_ARRAY_GET_SPECIALIZATION - -template -struct array_set -{ - void operator()(NBL_REF_ARG(U) arr, I index, T val) - { - arr[index] = val; - } -}; +//template +//struct array_set, ComponentType, uint32_t> +//{ +// void operator()(NBL_REF_ARG(emulated_vector_t2) vec, uint32_t index, ComponentType value) +// { +// vec.setComponent(index, value); +// } +//}; + +//template +//struct array_get, ComponentType, uint32_t> +//{ +// ComponentType operator()(typename emulated_vector_t vec, const uint32_t ix) +// { +// return vec.getComponent(ix); +// } +//}; + +//template +//struct array_get, ComponentType, uint32_t> +//{ +// ComponentType operator()(NBL_REF_ARG(emulated_vector_t4) vec, const uint32_t ix) +// { +// return vec.getComponent(ix); +// } +//}; -template -struct array_set, ComponentType, uint32_t> -{ - void operator()(NBL_REF_ARG(emulated_vector_t2) vec, uint32_t index, ComponentType value) - { - vec.setComponent(index, value); - } -}; template struct array_set, ComponentType, uint32_t> @@ -476,7 +462,7 @@ struct static_cast_helper, vector, void> { static inline emulated_vector_t3 cast(vector vec) { - emulated_vector_t2 output; + emulated_vector_t3 output; output.x = _static_cast(vec.x); output.y = _static_cast(vec.y); output.z = _static_cast(vec.z); @@ -490,7 +476,7 @@ struct static_cast_helper, vector, void> { static inline emulated_vector_t4 cast(vector vec) { - emulated_vector_t2 output; + emulated_vector_t4 output; output.x = _static_cast(vec.x); output.y = _static_cast(vec.y); output.z = _static_cast(vec.z); diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index c1b347c721..e1d7734bd6 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -195,6 +195,24 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; +template +struct array_get +{ + ComponentType operator()(NBL_CONST_REF_ARG(ArrayType) arr, const I ix) + { + return arr[ix]; + } +}; + +template +struct array_set +{ + void operator()(NBL_REF_ARG(ArrayType) arr, I index, ComponentType val) + { + arr[index] = val; + } +}; + } } diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index e17eb9a8c7..b074d81f7c 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -26,15 +26,15 @@ NBL_CONSTEXPR_INLINE_FUNC bool isTypeAllowed() } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type bitCastToUintType(T x) +NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { - using AsUint = typename unsigned_integer_of_size::type; + using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<2>::type bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<4>::type bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<8>::type bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl index 1eeba76546..890fcf174a 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl @@ -5,7 +5,6 @@ #define _NBL_BUILTIN_HLSL_MATH_QUADRATURE_GAUSS_LEGENDRE_INCLUDED_ #include -// TODO: portable/float64_t.hlsl instead? #include namespace nbl diff --git a/include/nbl/builtin/hlsl/portable/float64_t.hlsl b/include/nbl/builtin/hlsl/portable/float64_t.hlsl index 131d37a87b..e269a4a43b 100644 --- a/include/nbl/builtin/hlsl/portable/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/float64_t.hlsl @@ -9,8 +9,12 @@ namespace hlsl { template #ifdef __HLSL_VERSION -//using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; -using portable_float64_t = typename conditional::shaderFloat64, emulated_float64_t, emulated_float64_t >::type; +#ifdef NBL_FORCE_EMULATED_FLOAT_64 +using portable_float64_t = emulated_float64_t; +#else +using portable_float64_t = typename conditional::shaderFloat64, float64_t, emulated_float64_t >::type; +#endif + #else using portable_float64_t = float64_t; #endif diff --git a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl index c0b2596cb5..46fc6836af 100644 --- a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl @@ -45,47 +45,51 @@ using portable_float64_t3x3 = portable_matrix_t3x3; namespace impl { -// TODO: move to emulated/matrix.hlsl -// TODO: make one template for all dimensions -template -struct PortableMul64Helper -{ - static inline V multiply(M mat, V vec) + template + struct mul_helper { - V output; + static inline RhsT multiply(LhsT lhs, RhsT rhs) + { + return mul(lhs, rhs); + } + }; - output.x = (mat.rows[0] * vec).calcComponentSum(); - output.y = (mat.rows[1] * vec).calcComponentSum(); - output.z = (mat.rows[2] * vec).calcComponentSum(); + template + struct mul_helper, emulated_vector_t > + { + using LhsT = emulated_matrix; + using RhsT = emulated_vector_t; - return output; - } -}; + static inline RhsT multiply(LhsT mat, RhsT vec) + { + emulated_vector_t output; + output.x = (mat.rows[0] * vec).calcComponentSum(); + output.y = (mat.rows[1] * vec).calcComponentSum(); + output.z = (mat.rows[2] * vec).calcComponentSum(); -template -struct PortableMul64Helper -{ - static inline V multiply(M mat, V vec) - { - return mul(mat, vec); - } -}; + return output; + } + }; } -#ifdef __HLSL_VERSION -template -V portableMul64(M mat, V vec) +// TODO: move to basic.hlsl? +// TODO: concepts, to ensure that LhsT is a matrix and RhsT is a vector type +template +RhsT mul(LhsT lhs, RhsT rhs) { - return impl::PortableMul64Helper >::multiply(mat, vec); + return impl::mul_helper::multiply(lhs, rhs); } -#else -template -V portableMul64(M mat, V vec) -{ - return impl::PortableMul64Helper::multiply(mat, vec); -} -#endif +//template +//emulated_vector_t mul(emulated_matrix mat, emulated_vector_t vec) +//{ +// emulated_vector_t output; +// output.x = (mat.rows[0] * vec).calcComponentSum(); +// output.y = (mat.rows[1] * vec).calcComponentSum(); +// output.z = (mat.rows[2] * vec).calcComponentSum(); +// +// return output; +//} } } diff --git a/include/nbl/builtin/hlsl/shapes/beziers.hlsl b/include/nbl/builtin/hlsl/shapes/beziers.hlsl index fa009074e0..d4ebe6d0d8 100644 --- a/include/nbl/builtin/hlsl/shapes/beziers.hlsl +++ b/include/nbl/builtin/hlsl/shapes/beziers.hlsl @@ -512,13 +512,12 @@ struct Quadratic }; // This function returns the analytic quartic equation to solve for lhs bezier's t value for intersection with another bezier curve -template +template static math::equations::Quartic getBezierBezierIntersectionEquation(NBL_CONST_REF_ARG(QuadraticBezier) lhs, NBL_CONST_REF_ARG(QuadraticBezier) rhs) { using float_t2 = portable_vector_t2; - using float64 = portable_float64_t; - using float64_vec2 = portable_float64_t2; - + using float64 = conditional_t || is_same_v, float64_t, emulated_float64_t >; + using float64_vec2 = conditional_t || is_same_v, float64_t2, emulated_vector_t2 > >; // Algorithm based on Computer Aided Geometric Design: // https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=1000&context=facpub#page99 diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index d851620763..8fe39c0c40 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -723,6 +723,9 @@ struct float_of_size<8> using type = float64_t; }; +template +using float_of_size_t = typename float_of_size::type; + } } From 4359f91e627cc15dee7a79dec8a2004052f8475b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 26 Nov 2024 13:55:29 -0800 Subject: [PATCH 097/432] Fixed array_get and array_set --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 4 +- .../nbl/builtin/hlsl/emulated/matrix_t.hlsl | 72 ++++++----------- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 78 ++++++------------- .../nbl/builtin/hlsl/portable/matrix_t.hlsl | 57 ++++++-------- 5 files changed, 78 insertions(+), 135 deletions(-) diff --git a/examples_tests b/examples_tests index ad0e24f799..d62f779d68 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ad0e24f799afe4b7bb7f1675b061e8408d95a0dd +Subproject commit d62f779d6845e15599f10ce0ec90daf8a15c4e67 diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 8788794ccf..08f446809a 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -56,7 +56,9 @@ namespace nbl::hlsl // We need variadic macro in order to handle multi parameter templates because the // preprocessor parses the template parameters as different macro parameters. #define NBL_REF_ARG(...) typename nbl::hlsl::add_reference<__VA_ARGS__ >::type -#define NBL_CONST_REF_ARG(...) typename nbl::hlsl::add_reference>::type +//#define NBL_CONST_REF_ARG(...) typename nbl::hlsl::add_reference>::type + +#define NBL_CONST_REF_ARG(...) const __VA_ARGS__& #else diff --git a/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl b/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl index 2dde2bd90c..41219a6b7c 100644 --- a/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/matrix_t.hlsl @@ -8,69 +8,47 @@ namespace nbl namespace hlsl { -template -struct emulated_matrix {}; - -template -struct emulated_matrix +template +struct emulated_matrix { - using vec_t = emulated_vector_t2; - using this_t = emulated_matrix; - - vec_t rows[2]; - - this_t getTransposed() NBL_CONST_MEMBER_FUNC - { - this_t output; - - output.rows[0].x = rows[0].x; - output.rows[1].x = rows[0].y; - - output.rows[0].y = rows[1].x; - output.rows[1].y = rows[1].y; - - return output; - } -}; + using vec_t = emulated_vector_t; + using this_t = emulated_matrix; + using transposed_t = emulated_matrix; -template -struct emulated_matrix -{ - using vec_t = emulated_vector_t3; - using this_t = emulated_matrix; + NBL_CONSTEXPR_STATIC_INLINE uint32_t getRowCount() { return RowCount; } + NBL_CONSTEXPR_STATIC_INLINE uint32_t getColumnCount() { return RowCount; } - vec_t rows[3]; + vec_t rows[RowCount]; - this_t getTransposed() NBL_CONST_MEMBER_FUNC + transposed_t getTransposed() { - this_t output; - - output.rows[0].x = rows[0].x; - output.rows[1].x = rows[0].y; - output.rows[2].x = rows[0].z; - - output.rows[0].y = rows[1].x; - output.rows[1].y = rows[1].y; - output.rows[2].y = rows[1].z; + static nbl::hlsl::array_get getter; + static nbl::hlsl::array_set setter; - output.rows[0].z = rows[2].x; - output.rows[1].z = rows[2].y; - output.rows[2].z = rows[2].z; + transposed_t output; + for (int i = 0; i < RowCount; ++i) + { + for (int j = 0; j < ColumnCount; ++j) + setter(output.rows[i], j, getter(rows[j], i)); + } return output; } - vec_t operator[](uint32_t rowIdx) - { - return rows[rowIdx]; - } + //vec_t operator[](uint32_t rowIdx) + //{ + // return rows[rowIdx]; + //} }; template using emulated_matrix_t2x2 = emulated_matrix; template using emulated_matrix_t3x3 = emulated_matrix; - +template +using emulated_matrix_t4x4 = emulated_matrix; +template +using emulated_matrix_t3x4 = emulated_matrix; } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 89c78936a6..fdf78f65a7 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -379,6 +379,7 @@ template using emulated_vector_t4 = emulated_vector_impl::emulated_vector::type>; // TODO: better implementation +// TODO: do i even need that? template struct is_valid_emulated_vector { @@ -387,60 +388,29 @@ struct is_valid_emulated_vector is_same_v >; }; -template -struct array_get, ComponentType, uint32_t> -{ - ComponentType operator()(NBL_REF_ARG(emulated_vector_t2) vec, const uint32_t ix) - { - return vec.getComponent(ix); - } -}; - -//template -//struct array_set, ComponentType, uint32_t> -//{ -// void operator()(NBL_REF_ARG(emulated_vector_t2) vec, uint32_t index, ComponentType value) -// { -// vec.setComponent(index, value); -// } -//}; - -//template -//struct array_get, ComponentType, uint32_t> -//{ -// ComponentType operator()(typename emulated_vector_t vec, const uint32_t ix) -// { -// return vec.getComponent(ix); -// } -//}; - -//template -//struct array_get, ComponentType, uint32_t> -//{ -// ComponentType operator()(NBL_REF_ARG(emulated_vector_t4) vec, const uint32_t ix) -// { -// return vec.getComponent(ix); -// } -//}; - - -template -struct array_set, ComponentType, uint32_t> -{ - void operator()(NBL_REF_ARG(emulated_vector_t3) vec, uint32_t index, ComponentType value) - { - vec.setComponent(index, value); - } -}; - -template -struct array_set, ComponentType, uint32_t> -{ - void operator()(NBL_REF_ARG(emulated_vector_t4) vec, uint32_t index, ComponentType value) - { - vec.setComponent(index, value); - } -}; +// used this macro, because I can't make it work with templated array dimension +#define DEFINE_ARRAY_GET_SET_SPECIALIZATION(DIMENSION)\ +template\ +struct array_get, ComponentType, uint32_t>\ +{\ + inline ComponentType operator()(NBL_REF_ARG(emulated_vector_t##DIMENSION) vec, const uint32_t ix)\ + {\ + return vec.getComponent(ix);\ + }\ +};\ +template\ +struct array_set, ComponentType, uint32_t>\ +{\ + void operator()(NBL_REF_ARG(emulated_vector_t##DIMENSION) vec, uint32_t index, ComponentType value)\ + {\ + vec.setComponent(index, value);\ + }\ +};\ + +DEFINE_ARRAY_GET_SET_SPECIALIZATION(2) +DEFINE_ARRAY_GET_SET_SPECIALIZATION(3) +DEFINE_ARRAY_GET_SET_SPECIALIZATION(4) +#undef DEFINE_ARRAY_GET_SET_SPECIALIZATION namespace impl { diff --git a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl index 46fc6836af..2a8769244e 100644 --- a/include/nbl/builtin/hlsl/portable/matrix_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/matrix_t.hlsl @@ -43,33 +43,36 @@ template using portable_float64_t3x3 = portable_matrix_t3x3; #endif -namespace impl +namespace portable_matrix_impl { - template - struct mul_helper +template +struct mul_helper +{ + static inline RhsT multiply(LhsT lhs, RhsT rhs) { - static inline RhsT multiply(LhsT lhs, RhsT rhs) - { - return mul(lhs, rhs); - } - }; + return mul(lhs, rhs); + } +}; - template - struct mul_helper, emulated_vector_t > +// TODO: portable instead of emulated? so no need for partial spec? +template +struct mul_helper, emulated_vector_t > +{ + using LhsT = emulated_matrix; + using RhsT = emulated_vector_t; + + static inline RhsT multiply(LhsT mat, RhsT vec) { - using LhsT = emulated_matrix; - using RhsT = emulated_vector_t; + nbl::hlsl::array_get>, hlsl::emulated_float64_t> getter; + nbl::hlsl::array_set>, hlsl::emulated_float64_t> setter; - static inline RhsT multiply(LhsT mat, RhsT vec) - { - emulated_vector_t output; - output.x = (mat.rows[0] * vec).calcComponentSum(); - output.y = (mat.rows[1] * vec).calcComponentSum(); - output.z = (mat.rows[2] * vec).calcComponentSum(); + emulated_vector_t output; + for (int i = 0; i < RowCount; ++i) + setter(output, i, nbl::hlsl::dot(mat.rows[i], vec)); - return output; - } - }; + return output; + } +}; } // TODO: move to basic.hlsl? @@ -77,19 +80,9 @@ namespace impl template RhsT mul(LhsT lhs, RhsT rhs) { - return impl::mul_helper::multiply(lhs, rhs); + return portable_matrix_impl::mul_helper::multiply(lhs, rhs); } -//template -//emulated_vector_t mul(emulated_matrix mat, emulated_vector_t vec) -//{ -// emulated_vector_t output; -// output.x = (mat.rows[0] * vec).calcComponentSum(); -// output.y = (mat.rows[1] * vec).calcComponentSum(); -// output.z = (mat.rows[2] * vec).calcComponentSum(); -// -// return output; -//} } } From adb4262929e3daa74346a61022cfb35603cfbef2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 27 Nov 2024 16:52:22 -0800 Subject: [PATCH 098/432] Cherry picked matrix_traits.hlsl --- .../hlsl/matrix_utils/matrix_traits.hlsl | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl diff --git a/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl new file mode 100644 index 0000000000..28142af3ac --- /dev/null +++ b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl @@ -0,0 +1,44 @@ +#ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_MATRIX_TRAITS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATRIX_UTILS_MATRIX_TRAITS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +template +struct matrix_traits; + +// i choose to implement it this way because of this DXC bug: https://github.com/microsoft/DirectXShaderCompiler/issues/7007 +#define DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(ROW_COUNT, COLUMN_COUNT) \ +template \ +struct matrix_traits > \ +{ \ + using ComponentType = T; \ + NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = ROW_COUNT; \ + NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = COLUMN_COUNT; \ + NBL_CONSTEXPR_STATIC_INLINE bool Square = RowCount == ColumnCount; \ +}; + +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(2, 2) +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 3) +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(4, 4) +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 4) + +// TODO: when this bug: https://github.com/microsoft/DirectXShaderCompiler/issues/7007 is fixed, uncomment and delete template specializations +/*template +struct matrix_traits > +{ + using ComponentType = T; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = ROW_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = COLUMN_COUNT; + NBL_CONSTEXPR_STATIC_INLINE bool Square = RowCount == ColumnCount; +}; +*/ + +} +} + +#endif \ No newline at end of file From 6bcbe565ed99571ee1fb45259813521cd155b58b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 28 Nov 2024 15:18:58 -0800 Subject: [PATCH 099/432] Extended nbl::hlsl::dot function so it can be used with emulated_vectors.h --- include/nbl/builtin/hlsl/array_accessors.hlsl | 30 +++++++++++ .../nbl/builtin/hlsl/cpp_compat/intrinsics.h | 13 ++--- include/nbl/builtin/hlsl/dot_product.hlsl | 25 ++++++++++ include/nbl/builtin/hlsl/functional.hlsl | 18 ------- .../builtin/hlsl/portable/dot_product.hlsl | 35 +++++++++++++ include/nbl/builtin/hlsl/scalar_of.hlsl | 23 +++++++++ .../builtin/hlsl/vector_utils/scalar_of.hlsl | 24 +++++++++ .../hlsl/vector_utils/vector_traits.hlsl | 50 +++++++++++++++++++ 8 files changed, 190 insertions(+), 28 deletions(-) create mode 100644 include/nbl/builtin/hlsl/array_accessors.hlsl create mode 100644 include/nbl/builtin/hlsl/dot_product.hlsl create mode 100644 include/nbl/builtin/hlsl/portable/dot_product.hlsl create mode 100644 include/nbl/builtin/hlsl/scalar_of.hlsl create mode 100644 include/nbl/builtin/hlsl/vector_utils/scalar_of.hlsl create mode 100644 include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl diff --git a/include/nbl/builtin/hlsl/array_accessors.hlsl b/include/nbl/builtin/hlsl/array_accessors.hlsl new file mode 100644 index 0000000000..11d685a150 --- /dev/null +++ b/include/nbl/builtin/hlsl/array_accessors.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_BUILTIN_HLSL_ARRAY_ACCESSORS_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_ARRAY_ACCESSORS_HLSL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +template +struct array_get +{ + ComponentType operator()(NBL_CONST_REF_ARG(ArrayType) arr, const I ix) + { + return arr[ix]; + } +}; + +template +struct array_set +{ + void operator()(NBL_REF_ARG(ArrayType) arr, I index, ComponentType val) + { + arr[index] = val; + } +}; +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h index e83ced0bc5..100e7bf2c3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h @@ -10,6 +10,9 @@ #include #include #include "nbl/core/util/bitflag.h" +#include + +//#include namespace nbl::hlsl { @@ -44,16 +47,6 @@ NBL_BIT_OP_GLM_PASSTHROUGH(bitCount,bitCount) NBL_SIMPLE_GLM_PASSTHROUGH(cross,cross) NBL_SIMPLE_GLM_PASSTHROUGH(clamp,clamp) -template -inline scalar_type_t dot(const T& lhs, const T& rhs) -{ - scalar_type_t retval = lhs[0]*rhs[0]; - // whatever has a `scalar_type` specialization should be a pure vector - for (auto i=1; i