From 995f1702382b34e875303b2707439d4c12199cdb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 21 Jan 2025 09:15:57 +0700 Subject: [PATCH 01/68] HLSL RayTracing Compilation Signed-off-by: kevyuu --- src/nbl/asset/interchange/CHLSLLoader.cpp | 6 ++++++ src/nbl/asset/utils/CHLSLCompiler.cpp | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/src/nbl/asset/interchange/CHLSLLoader.cpp b/src/nbl/asset/interchange/CHLSLLoader.cpp index a6924b132a..e049f3bdab 100644 --- a/src/nbl/asset/interchange/CHLSLLoader.cpp +++ b/src/nbl/asset/interchange/CHLSLLoader.cpp @@ -38,6 +38,12 @@ SAssetBundle CHLSLLoader::loadAsset(system::IFile* _file, const IAssetLoader::SA {".comp.hlsl",IShader::E_SHADER_STAGE::ESS_COMPUTE}, {".mesh.hlsl",IShader::E_SHADER_STAGE::ESS_MESH}, {".task.hlsl",IShader::E_SHADER_STAGE::ESS_TASK}, + {".rgen.hlsl",IShader::E_SHADER_STAGE::ESS_RAYGEN}, + {".rahit.hlsl",IShader::E_SHADER_STAGE::ESS_ANY_HIT}, + {".rchit.hlsl",IShader::E_SHADER_STAGE::ESS_CLOSEST_HIT}, + {".rmiss.hlsl",IShader::E_SHADER_STAGE::ESS_MISS}, + {".rint.hlsl",IShader::E_SHADER_STAGE::ESS_INTERSECTION}, + {".rcall.hlsl",IShader::E_SHADER_STAGE::ESS_CALLABLE}, }; auto shaderStage = IShader::E_SHADER_STAGE::ESS_UNKNOWN; for (auto& it : typeFromExt) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 2827361ff4..fb54f4c1f3 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -44,6 +44,13 @@ static const wchar_t* ShaderStageToString(asset::IShader::E_SHADER_STAGE stage) return L"as"; case asset::IShader::E_SHADER_STAGE::ESS_MESH: return L"ms"; + case asset::IShader::E_SHADER_STAGE::ESS_RAYGEN: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_ANY_HIT: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_CLOSEST_HIT: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_MISS: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_INTERSECTION: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_CALLABLE: + return L"lib"; default: return nullptr; }; From 1f3b397fd9f41a3941c454dbf750163311698bcb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 21 Jan 2025 09:19:22 +0700 Subject: [PATCH 02/68] Ray Tracing Pipeline creation Signed-off-by: kevyuu --- include/nbl/asset/IRayTracingPipeline.h | 152 ++++++++++++++++++ include/nbl/video/CVulkanRayTracingPipeline.h | 42 +++++ include/nbl/video/IGPURayTracingPipeline.h | 92 +++++++++++ include/nbl/video/ILogicalDevice.h | 10 ++ src/nbl/CMakeLists.txt | 1 + src/nbl/video/CVulkanLogicalDevice.cpp | 106 ++++++++++++ src/nbl/video/CVulkanLogicalDevice.h | 8 + src/nbl/video/CVulkanPhysicalDevice.cpp | 1 + src/nbl/video/CVulkanRayTracingPipeline.cpp | 61 +++++++ src/nbl/video/ILogicalDevice.cpp | 33 ++++ .../device_capabilities/device_limits.json | 1 - 11 files changed, 506 insertions(+), 1 deletion(-) create mode 100644 include/nbl/asset/IRayTracingPipeline.h create mode 100644 include/nbl/video/CVulkanRayTracingPipeline.h create mode 100644 include/nbl/video/IGPURayTracingPipeline.h create mode 100644 src/nbl/video/CVulkanRayTracingPipeline.cpp diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h new file mode 100644 index 0000000000..fb055a8dff --- /dev/null +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -0,0 +1,152 @@ +#ifndef _NBL_ASSET_I_RAY_TRACING_PIPELINE_H_INCLUDED_ +#define _NBL_ASSET_I_RAY_TRACING_PIPELINE_H_INCLUDED_ + + +#include "nbl/asset/IShader.h" +#include "nbl/asset/RasterizationStates.h" +#include "nbl/asset/IPipeline.h" +#include "nbl/asset/IRenderpass.h" + +#include + + +namespace nbl::asset +{ + struct SShaderGroupsParams + { + constexpr static inline uint32_t ShaderUnused = 0xffFFffFFu; + + struct SGeneralGroup + { + uint32_t shaderIndex = ShaderUnused; + }; + + struct SHitGroup + { + uint32_t closestHitShaderIndex = ShaderUnused; + uint32_t anyHitShaderIndex = ShaderUnused; + uint32_t intersectionShaderIndex = ShaderUnused; + }; + + SGeneralGroup raygenGroup; + core::vector hitGroups; + core::vector missGroups; + core::vector callableShaderGroups; + + inline uint32_t getShaderGroupCount() const + { + return 1 + hitGroups.size() + missGroups.size() + callableShaderGroups.size(); + } + + }; + using SGeneralShaderGroup = SShaderGroupsParams::SGeneralGroup; + using SHitShaderGroup = SShaderGroupsParams::SHitGroup; + ; + + class IRayTracingPipelineBase : public virtual core::IReferenceCounted + { + public: + + struct SCachedCreationParams final + { + SShaderGroupsParams shaderGroups; + uint64_t maxRecursionDepth; + }; + }; + + template + class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase + { + public: + struct SCreationParams : IPipeline::SCreationParams + { + protected: + using SpecInfo = ShaderType::SSpecInfo; + template + inline bool impl_valid(ExtraLambda&& extra) const + { + if (!IPipeline::SCreationParams::layout) + return false; + + core::bitflag stagePresence = {}; + for (const auto info : shaders) + if (info.shader) + { + if (!extra(info)) + return false; + const auto stage = info.shader->getStage(); + if (stage > ICPUShader::E_SHADER_STAGE::ESS_CALLABLE || stage < ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + return false; + if (stage == ICPUShader::E_SHADER_STAGE::ESS_RAYGEN && stagePresence.hasFlags(hlsl::ESS_RAYGEN)) + return false; + stagePresence |= stage; + } + + auto getShaderStage = [this](size_t index) -> ICPUShader::E_SHADER_STAGE + { + return shaders[index].shader->getStage(); + }; + + if (cached.shaderGroups.raygenGroup.shaderIndex >= shaders.size()) + return false; + if (getShaderStage(cached.shaderGroups.raygenGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + return false; + + for (const auto& shaderGroup : cached.shaderGroups.hitGroups) + { + if (shaderGroup.anyHitShaderIndex != SShaderGroupsParams::ShaderUnused && getShaderStage(shaderGroup.anyHitShaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT) + return false; + + if (shaderGroup.closestHitShaderIndex != SShaderGroupsParams::ShaderUnused && getShaderStage(shaderGroup.closestHitShaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT) + return false; + + if (shaderGroup.intersectionShaderIndex != SShaderGroupsParams::ShaderUnused && getShaderStage(shaderGroup.intersectionShaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION) + return false; + } + + for (const auto& shaderGroup : cached.shaderGroups.missGroups) + { + if (getShaderStage(shaderGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_MISS) + return false; + } + + for (const auto& shaderGroup : cached.shaderGroups.callableShaderGroups) + { + if (getShaderStage(shaderGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_CALLABLE) + return false; + } + return true; + } + + public: + inline bool valid() const + { + return impl_valid([](const SpecInfo& info)->bool + { + if (!info.valid()) + return false; + return false; + }); + } + + std::span shaders = {}; + SCachedCreationParams cached = {}; + }; + + inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } + size_t getHitGroupCount() const { return m_params.shaderGroups.hitGroups.size(); } + size_t getMissGroupCount() const { return m_params.shaderGroups.missGroups.size(); } + size_t getCallableGroupCount() const { return m_params.shaderGroups.callableShaderGroups.size(); } + + protected: + explicit IRayTracingPipeline(const SCreationParams& _params) : + IPipeline(core::smart_refctd_ptr(_params.layout)), + m_params(_params.cached) { + } + + SCachedCreationParams m_params; + }; + +} + +#endif diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h new file mode 100644 index 0000000000..f7b2c88ebf --- /dev/null +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -0,0 +1,42 @@ +#ifndef _NBL_C_VULKAN_RAY_TRACING_PIPELINE_H_INCLUDED_ +#define _NBL_C_VULKAN_RAY_TRACING_PIPELINE_H_INCLUDED_ + + +#include "nbl/video/IGPURayTracingPipeline.h" + +#include "nbl/video/CVulkanShader.h" + + +namespace nbl::video +{ + + + class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline + { + using ShaderRef = core::smart_refctd_ptr; + using ShaderContainer = core::smart_refctd_dynamic_array; + using ShaderHandleContainer = core::smart_refctd_dynamic_array; + public: + + CVulkanRayTracingPipeline(const SCreationParams& params, const VkPipeline vk_pipeline); + + inline const void* getNativeHandle() const override { return &m_vkPipeline; } + + inline VkPipeline getInternalObject() const { return m_vkPipeline; } + + std::span getRaygenGroupShaderHandle() const override; + std::span getHitGroupShaderHandle(uint32_t index) const override; + std::span getMissGroupShaderHandle(uint32_t index) const override; + std::span getCallableGroupShaderHandle(uint32_t index) const override; + + private: + ~CVulkanRayTracingPipeline(); + + const VkPipeline m_vkPipeline; + ShaderContainer m_shaders; + ShaderHandleContainer m_shaderGroupHandles; + }; + +} + +#endif diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h new file mode 100644 index 0000000000..0d6d0f13a0 --- /dev/null +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -0,0 +1,92 @@ +#ifndef _NBL_I_GPU_RAY_TRACING_PIPELINE_H_INCLUDED_ +#define _NBL_I_GPU_RAY_TRACING_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IPipeline.h" +#include "nbl/asset/IRayTracingPipeline.h" + +#include "nbl/video/SPipelineCreationParams.h" + + +namespace nbl::video +{ + +class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingPipeline +{ + using pipeline_t = asset::IRayTracingPipeline; + + public: + struct SCreationParams final : pipeline_t::SCreationParams, SPipelineCreationParams + { + #define base_flag(F) static_cast(pipeline_t::SCreationParams::FLAGS::F) + enum class FLAGS : uint64_t + { + NONE = base_flag(NONE), + DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), + ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), + CAPTURE_STATISTICS = base_flag(CAPTURE_STATISTICS), + CAPTURE_INTERNAL_REPRESENTATIONS = base_flag(CAPTURE_INTERNAL_REPRESENTATIONS), + FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), + EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), + LINK_TIME_OPTIMIZATION = base_flag(LINK_TIME_OPTIMIZATION), + RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT = base_flag(RETAIN_LINK_TIME_OPTIMIZATION_INFO), + }; + #undef base_flag + + inline SSpecializationValidationResult valid() const + { + if (!layout) + return {}; + + SSpecializationValidationResult retval = { + .count=0, + .dataSize=0, + }; + const bool valid = pipeline_t::SCreationParams::impl_valid([&retval](const IGPUShader::SSpecInfo& info)->bool + { + const auto dataSize = info.valid(); + if (dataSize<0) + return false; + else if (dataSize==0) + return true; + + const size_t count = info.entries ? info.entries->size():0x80000000ull; + if (count>0x7fffffff) + return {}; + retval += {.count=dataSize ? static_cast(count):0,.dataSize=static_cast(dataSize)}; + return retval; + }); + if (!valid) + return {}; + return retval; + } + + inline std::span getShaders() const { return shaders; } + + // TODO: Could guess the required flags from SPIR-V introspection of declared caps + core::bitflag flags = FLAGS::NONE; + }; + + inline core::bitflag getCreationFlags() const { return m_flags; } + + // Vulkan: const VkPipeline* + virtual const void* getNativeHandle() const = 0; + + virtual std::span getRaygenGroupShaderHandle() const = 0; + virtual std::span getHitGroupShaderHandle(uint32_t index) const = 0; + virtual std::span getMissGroupShaderHandle(uint32_t index) const = 0; + virtual std::span getCallableGroupShaderHandle(uint32_t index) const = 0; + + protected: + IGPURayTracingPipeline(const SCreationParams& params) : IBackendObject(core::smart_refctd_ptr(params.layout->getOriginDevice())), + pipeline_t(params), + m_flags(params.flags) + {} + + virtual ~IGPURayTracingPipeline() = default; + + const core::bitflag m_flags; +}; + +} + +#endif diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index b79a2b053b..2a4a3837ee 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -905,6 +905,10 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe core::smart_refctd_ptr* const output ); + bool createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output); + // queries inline core::smart_refctd_ptr createQueryPool(const IQueryPool::SCreationParams& params) { @@ -1162,6 +1166,12 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe core::smart_refctd_ptr* const output, const IGPUGraphicsPipeline::SCreationParams::SSpecializationValidationResult& validation ) = 0; + virtual void createRayTracingPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult& validation + ) = 0; virtual core::smart_refctd_ptr createQueryPool_impl(const IQueryPool::SCreationParams& params) = 0; virtual bool getQueryPoolResults_impl(const IQueryPool* const queryPool, const uint32_t firstQuery, const uint32_t queryCount, void* const pData, const size_t stride, const core::bitflag flags) = 0; diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index ecb3a84f9d..ad464bd035 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -275,6 +275,7 @@ set(NBL_VIDEO_SOURCES ${NBL_ROOT_PATH}/src/nbl/video/CVulkanConnection.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanPhysicalDevice.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanGraphicsPipeline.cpp + ${NBL_ROOT_PATH}/src/nbl/video/CVulkanRayTracingPipeline.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanEvent.cpp ${NBL_ROOT_PATH}/src/nbl/video/CSurfaceVulkan.cpp diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index cc96eb6e51..4502c97fba 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1425,6 +1425,112 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( std::fill_n(output,vk_createInfos.size(),nullptr); } +void CVulkanLogicalDevice::createRayTracingPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult& validation +) +{ + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject():VK_NULL_HANDLE; + + size_t maxShaderStages = 0; + for (const auto& info : createInfos) + maxShaderStages += info.shaders.size(); + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,nullptr }); + core::vector vk_requiredSubgroupSize(maxShaderStages,{ + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr + }); + core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr }); + core::vector vk_shaderGroup(maxShaderStages, { VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr}); + core::vector vk_specializationInfos(createInfos.size(), { 0, nullptr, 0, nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + + auto outCreateInfo = vk_createInfos.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outShaderStage = vk_shaderStage.data(); + auto outShaderGroup = vk_shaderGroup.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + auto getVkShaderIndex = [](uint32_t index) { return index == asset::SShaderGroupsParams::ShaderUnused ? VK_SHADER_UNUSED_KHR : index; }; + auto getGeneralVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](asset::SGeneralShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR + { + return { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .pNext = nullptr, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = getVkShaderIndex(group.shaderIndex), + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR, + }; + }; + auto getHitVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](asset::SHitShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR + { + return { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .pNext = nullptr, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = getVkShaderIndex(group.closestHitShaderIndex), + .anyHitShader = getVkShaderIndex(group.anyHitShaderIndex), + .intersectionShader = getVkShaderIndex(group.intersectionShaderIndex), + }; + }; + for (const auto& info : createInfos) + { + initPipelineCreateInfo(outCreateInfo,info); + outCreateInfo->pStages = outShaderStage; + for (const auto& specInfo : info.shaders) + { + if (specInfo.shader) + { + const auto stage = specInfo.shader->getStage(); + *(outShaderStage++) = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .stage = static_cast(stage), + .module = static_cast(specInfo.shader)->getInternalObject(), + .pName = specInfo.entryPoint.c_str(), + }; + } + } + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); + + const auto& shaderGroups = info.cached.shaderGroups; + outCreateInfo->pGroups = outShaderGroup; + *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroups.raygenGroup); + for (const auto& shaderGroup : shaderGroups.hitGroups) + *(outShaderGroup++) = getHitVkRayTracingShaderGroupCreateInfo(shaderGroup); + for (const auto& shaderGroup : shaderGroups.missGroups) + *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); + for (const auto& shaderGroup : shaderGroups.callableShaderGroups) + *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); + outCreateInfo->groupCount = 1 + shaderGroups.hitGroups.size() + shaderGroups.missGroups.size() + shaderGroups.callableShaderGroups.size(); + outCreateInfo->maxPipelineRayRecursionDepth = info.cached.maxRecursionDepth; + } + + auto vk_pipelines = reinterpret_cast(output); + if (m_devf.vk.vkCreateRayTracingPipelinesKHR(m_vkdev, VK_NULL_HANDLE, vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) + { + for (size_t i=0ull; i( + createInfos[i], + vk_pipeline + ); + } + } + else + std::fill_n(output,vk_createInfos.size(),nullptr); +} + core::smart_refctd_ptr CVulkanLogicalDevice::createQueryPool_impl(const IQueryPool::SCreationParams& params) { VkQueryPoolCreateInfo info = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr}; diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 410efe5d80..3ed5e9983a 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -31,6 +31,7 @@ #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" #include "nbl/video/CVulkanGraphicsPipeline.h" +#include "nbl/video/CVulkanRayTracingPipeline.h" namespace nbl::video @@ -302,6 +303,13 @@ class CVulkanLogicalDevice final : public ILogicalDevice const IGPUGraphicsPipeline::SCreationParams::SSpecializationValidationResult& validation ) override; + void createRayTracingPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult& validation + ) override; + // queries core::smart_refctd_ptr createQueryPool_impl(const IQueryPool::SCreationParams& params) override; bool getQueryPoolResults_impl(const IQueryPool* const queryPool, const uint32_t firstQuery, const uint32_t queryCount, void* const pData, const size_t stride, const core::bitflag flags) override; diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 79ffc7306d..6d2ea63e08 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -601,6 +601,7 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart logger.log("Not enumerating VkPhysicalDevice %p because it reports limits of exact-type contrary to Vulkan specification!", system::ILogger::ELL_INFO, vk_physicalDevice); return nullptr; } + properties.limits.shaderGroupHandleSize = rayTracingPipelineProperties.shaderGroupHandleSize; properties.limits.maxRayRecursionDepth = rayTracingPipelineProperties.maxRayRecursionDepth; properties.limits.maxShaderGroupStride = rayTracingPipelineProperties.maxShaderGroupStride; properties.limits.shaderGroupBaseAlignment = rayTracingPipelineProperties.shaderGroupBaseAlignment; diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp new file mode 100644 index 0000000000..a18509e036 --- /dev/null +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -0,0 +1,61 @@ +#include "nbl/video/CVulkanRayTracingPipeline.h" + +#include "nbl/video/CVulkanLogicalDevice.h" + +namespace nbl::video +{ + + CVulkanRayTracingPipeline::CVulkanRayTracingPipeline(const SCreationParams& params, const VkPipeline vk_pipeline) : + IGPURayTracingPipeline(params), + m_vkPipeline(vk_pipeline), + m_shaders(core::make_refctd_dynamic_array(params.shaders.size())) + { + for (size_t shaderIx = 0; shaderIx < params.shaders.size(); shaderIx++) + m_shaders->operator[](shaderIx) = ShaderRef(static_cast(params.shaders[shaderIx].shader)); + + const auto* vulkanDevice = static_cast(getOriginDevice()); + const auto handleCount = params.cached.shaderGroups.getShaderGroupCount(); + const auto handleSize = vulkanDevice->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto dataSize = handleCount * handleSize; + auto* vk = vulkanDevice->getFunctionTable(); + m_shaderGroupHandles = core::make_refctd_dynamic_array(dataSize); + vk->vk.vkGetRayTracingShaderGroupHandlesKHR(vulkanDevice->getInternalObject(), m_vkPipeline, 0, handleCount, dataSize, m_shaderGroupHandles->data()); + } + + CVulkanRayTracingPipeline::~CVulkanRayTracingPipeline() + { + const auto* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); + } + + std::span CVulkanRayTracingPipeline::getRaygenGroupShaderHandle() const + { + const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + return {m_shaderGroupHandles->data(), handleSize}; + } + + std::span CVulkanRayTracingPipeline::getHitGroupShaderHandle(uint32_t index) const + { + const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto baseOffset = handleSize; // one raygen handle before this group + return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; + } + + std::span CVulkanRayTracingPipeline::getMissGroupShaderHandle(uint32_t index) const + { + const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto baseOffset = handleSize + getHitGroupCount() * handleSize; // one raygen + hit groups handle before this group + return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; + } + + std::span CVulkanRayTracingPipeline::getCallableGroupShaderHandle(uint32_t index) const + { + const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + + // one raygen + hit groups + miss groups handle before this group + const auto baseOffset = handleSize + getHitGroupCount() * handleSize + getMissGroupCount() * handleSize; + + return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; + } +} diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 8c6a7752a6..80f51ac5a6 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -942,4 +942,37 @@ bool ILogicalDevice::createGraphicsPipelines( return true; } +bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output) +{ + std::fill_n(output,params.size(),nullptr); + IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params,[this](const IGPUShader::SSpecInfo& info)->bool + { + if (!info.shader->wasCreatedBy(this)) + { + NBL_LOG_ERROR("The shader was not created by this device"); + return false; + } + return true; + }); + if (!specConstantValidation) + { + NBL_LOG_ERROR("Invalid parameters were given"); + return false; + } + + createRayTracingPipelines_impl(pipelineCache,params,output,specConstantValidation); + + bool retval = true; + for (auto i=0u; i Date: Tue, 21 Jan 2025 09:27:58 +0700 Subject: [PATCH 03/68] Implement binding Ray Tracing Pipeline command Signed-off-by: kevyuu --- include/nbl/asset/ECommonEnums.h | 2 +- include/nbl/builtin/hlsl/enums.hlsl | 1 + include/nbl/video/IGPUCommandBuffer.h | 2 ++ include/nbl/video/IGPUCommandPool.h | 12 ++++++++++++ src/nbl/video/CVulkanCommandBuffer.cpp | 26 +++++++++++++++++++++++++- src/nbl/video/CVulkanCommandBuffer.h | 1 + src/nbl/video/IGPUCommandBuffer.cpp | 24 ++++++++++++++++++++++++ 7 files changed, 66 insertions(+), 2 deletions(-) diff --git a/include/nbl/asset/ECommonEnums.h b/include/nbl/asset/ECommonEnums.h index 4f49d89a06..7db562cc6a 100644 --- a/include/nbl/asset/ECommonEnums.h +++ b/include/nbl/asset/ECommonEnums.h @@ -11,7 +11,7 @@ enum E_PIPELINE_BIND_POINT : uint8_t { EPBP_GRAPHICS = 0, EPBP_COMPUTE, - + EPBP_RAY_TRACING, EPBP_COUNT }; diff --git a/include/nbl/builtin/hlsl/enums.hlsl b/include/nbl/builtin/hlsl/enums.hlsl index 990b6273ad..d672ab85af 100644 --- a/include/nbl/builtin/hlsl/enums.hlsl +++ b/include/nbl/builtin/hlsl/enums.hlsl @@ -29,6 +29,7 @@ enum ShaderStage : uint32_t ESS_INTERSECTION = 1 << 12, ESS_CALLABLE = 1 << 13, ESS_ALL_GRAPHICS = 0x0000001F, + ESS_ALL_RAY_TRACING = ESS_RAYGEN | ESS_ANY_HIT | ESS_CLOSEST_HIT | ESS_MISS | ESS_INTERSECTION | ESS_CALLABLE, ESS_ALL = 0x7fffffff }; diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 8fc7cdc737..008ac39426 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -316,6 +316,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject //! state setup bool bindComputePipeline(const IGPUComputePipeline* const pipeline); bool bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline); + bool bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline); bool bindDescriptorSets( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, @@ -618,6 +619,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; virtual bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) = 0; + virtual bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) = 0; virtual bool bindDescriptorSets_impl( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index ad9b110e25..55966ba10f 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -10,6 +10,7 @@ #include "nbl/video/IGPUDescriptorSet.h" #include "nbl/video/IGPUComputePipeline.h" #include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/video/IGPURayTracingPipeline.h" #include "nbl/video/IGPUFramebuffer.h" #include "nbl/video/IQueryPool.h" @@ -151,6 +152,7 @@ class IGPUCommandPool : public IBackendObject class CBuildAccelerationStructuresCmd; // for both vkCmdBuildAccelerationStructuresKHR and vkCmdBuildAccelerationStructuresIndirectKHR class CCopyAccelerationStructureCmd; class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR + class CBindRayTracingPipelineCmd; protected: IGPUCommandPool(core::smart_refctd_ptr&& dev, const core::bitflag _flags, const uint8_t _familyIx) @@ -822,6 +824,16 @@ class IGPUCommandPool::CCopyAccelerationStructureToOrFromMemoryCmd final : publi core::smart_refctd_ptr m_buffer; }; + + +class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand +{ + public: + CBindRayTracingPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + + private: + core::smart_refctd_ptr m_pipeline; +}; NBL_ENUM_ADD_BITWISE_OPERATORS(IGPUCommandPool::CREATE_FLAGS) } diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 75a24a8a61..f9a62d6788 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -8,6 +8,24 @@ using namespace nbl; using namespace nbl::video; +namespace +{ + VkPipelineBindPoint vkCast(asset::E_PIPELINE_BIND_POINT bindPoint) + { + switch (bindPoint) + { + case asset::EPBP_GRAPHICS: + return VK_PIPELINE_BIND_POINT_GRAPHICS; + case asset::EPBP_COMPUTE: + return VK_PIPELINE_BIND_POINT_COMPUTE; + case asset::EPBP_RAY_TRACING: + return VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + default: + // unreachable() macro + return static_cast(bindPoint); + } + } +} const VolkDeviceTable& CVulkanCommandBuffer::getFunctionTable() const { @@ -397,6 +415,12 @@ bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* return true; } +bool CVulkanCommandBuffer::bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) +{ + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, static_cast(pipeline)->getInternalObject()); + return true; +} + bool CVulkanCommandBuffer::bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount, const uint32_t* const dynamicOffsets) { VkDescriptorSet vk_descriptorSets[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT] = {}; @@ -437,7 +461,7 @@ bool CVulkanCommandBuffer::bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_ dynamicOffsetCount += dynamicOffsetCountPerSet[setIndex]; getFunctionTable().vkCmdBindDescriptorSets( - m_cmdbuf,static_cast(pipelineBindPoint),vk_pipelineLayout, + m_cmdbuf,vkCast(pipelineBindPoint),vk_pipelineLayout, firstSet+first, last-first, vk_descriptorSets+first, dynamicOffsetCount, dynamicOffsets+dynamicOffsetsBindOffset ); diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 8f83b28850..93875002f6 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -183,6 +183,7 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) override; + bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) override; bool bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* const dynamicOffsets = nullptr) override; bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) override; bool bindVertexBuffers_impl(const uint32_t firstBinding, const uint32_t bindingCount, const asset::SBufferBinding* const pBindings) override; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 43fc709b77..ec4bc67396 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -884,6 +884,30 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p return bindGraphicsPipeline_impl(pipeline); } +bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline) +{ + // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, + // we cannot check renderpass-pipeline compatibility here. + // And checking before every drawcall would be performance suicide. + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT)) + return false; + + if (!pipeline || !this->isCompatibleDevicewise(pipeline)) + { + NBL_LOG_ERROR("incompatible pipeline device!"); + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + return bindRayTracingPipeline_impl(pipeline); +} + bool IGPUCommandBuffer::bindDescriptorSets( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, From e44b8d5fc6bd9686fb07cf810b5eb007443430b7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 21 Jan 2025 09:32:14 +0700 Subject: [PATCH 04/68] Implement trace rays command Signed-off-by: kevyuu --- include/nbl/asset/IBuffer.h | 31 ++++++++++++++++++++++++ include/nbl/video/IGPUCommandBuffer.h | 13 ++++++++++ include/nbl/video/IGPUCommandPool.h | 21 ++++++++++++++++ src/nbl/video/CVulkanCommandBuffer.cpp | 33 ++++++++++++++++++++++++++ src/nbl/video/CVulkanCommandBuffer.h | 7 ++++++ src/nbl/video/IGPUCommandBuffer.cpp | 33 ++++++++++++++++++++++++++ 6 files changed, 138 insertions(+) diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h index 27888d96a1..177b730b78 100644 --- a/include/nbl/asset/IBuffer.h +++ b/include/nbl/asset/IBuffer.h @@ -117,6 +117,37 @@ struct SBufferRange inline bool operator!=(const SBufferRange& rhs) const { return !operator==(rhs); } }; +template +struct SStridedBufferRegion +{ + static constexpr inline size_t WholeBuffer = ~0ull; + + size_t offset = 0ull; + size_t stride = 0; + size_t size = WholeBuffer; + core::smart_refctd_ptr buffer = nullptr; + + + inline operator SStridedBufferRegion&() {return *reinterpret_cast*>(this);} + inline operator const SStridedBufferRegion&() const {return *reinterpret_cast*>(this);} + + explicit inline operator bool() const {return isValid();} + + inline bool isValid() const + { + if (!buffer || offset>=buffer->getSize() || size==0ull || stride>buffer->getSize()) + return false; + return actualSize()<=buffer->getSize()-offset; + } + + inline size_t actualSize() const + { + return size!=WholeBuffer ? size:buffer->getSize(); + } + inline bool operator==(const SStridedBufferRegion& rhs) const { return buffer==rhs.buffer && offset==rhs.offset && actualSize()==rhs.actualSize() && stride==rhs.stride; } + inline bool operator!=(const SStridedBufferRegion& rhs) const { return !operator==(rhs); } +}; + } namespace std diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 008ac39426..566bdbb7be 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -524,6 +524,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject }; bool resolveImage(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* const pRegions); + bool IGPUCommandBuffer::traceRays(const asset::SStridedBufferRegion& raygenGroupRegion, + const asset::SStridedBufferRegion& missGroupsRegion, + const asset::SStridedBufferRegion& hitGroupsRegion, + const asset::SStridedBufferRegion& callableGroupsRegion, + uint32_t width, uint32_t height, uint32_t depth); + //! Secondary CommandBuffer execute bool executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs); @@ -665,6 +671,13 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) = 0; virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; + virtual bool traceRays_impl( + const asset::SStridedBufferRegion& raygenGroupRegion, + const asset::SStridedBufferRegion& missGroupsRegion, + const asset::SStridedBufferRegion& hitGroupsRegion, + const asset::SStridedBufferRegion& callableGroupsRegion, + uint32_t width, uint32_t height, uint32_t depth) = 0; + virtual bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) = 0; virtual void releaseResourcesBackToPool_impl() {} diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index 55966ba10f..95c3315808 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -152,6 +152,7 @@ class IGPUCommandPool : public IBackendObject class CBuildAccelerationStructuresCmd; // for both vkCmdBuildAccelerationStructuresKHR and vkCmdBuildAccelerationStructuresIndirectKHR class CCopyAccelerationStructureCmd; class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR + class CTraceRaysCmd; class CBindRayTracingPipelineCmd; protected: @@ -824,6 +825,26 @@ class IGPUCommandPool::CCopyAccelerationStructureToOrFromMemoryCmd final : publi core::smart_refctd_ptr m_buffer; }; +class IGPUCommandPool::CTraceRaysCmd final : public IFixedSizeCommand +{ + public: + CTraceRaysCmd( + core::smart_refctd_ptr&& raygenGroupBuffer, + core::smart_refctd_ptr&& hitGroupsBuffer, + core::smart_refctd_ptr&& missGroupsBuffer, + core::smart_refctd_ptr&& callableGroupsBuffer) : + m_raygenGroupBuffer(raygenGroupBuffer), + m_hitGroupsBuffer(hitGroupsBuffer), + m_missGroupsBuffer(missGroupsBuffer), + m_callableGroupsBuffer(callableGroupsBuffer) {} + + + private: + core::smart_refctd_ptr m_raygenGroupBuffer; + core::smart_refctd_ptr m_hitGroupsBuffer; + core::smart_refctd_ptr m_missGroupsBuffer; + core::smart_refctd_ptr m_callableGroupsBuffer; +}; class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index f9a62d6788..0b09743c51 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -835,6 +835,39 @@ bool CVulkanCommandBuffer::resolveImage_impl(const IGPUImage* const srcImage, co return true; } +bool CVulkanCommandBuffer::traceRays_impl( + const asset::SStridedBufferRegion& raygenGroupRegion, + const asset::SStridedBufferRegion& missGroupsRegion, + const asset::SStridedBufferRegion& hitGroupsRegion, + const asset::SStridedBufferRegion& callableGroupsRegion, + uint32_t width, uint32_t height, uint32_t depth) +{ + auto toVkRegion = [](const asset::SStridedBufferRegion& region) -> VkStridedDeviceAddressRegionKHR + { + if (region.buffer.get() == nullptr) + return {}; + + return { + .deviceAddress = region.buffer->getDeviceAddress() + region.offset, + .stride = region.stride, + .size = region.size, + }; + }; + + const auto vk_raygenGroupRegion = toVkRegion(raygenGroupRegion); + const auto vk_missGroupsRegion = toVkRegion(missGroupsRegion); + const auto vk_hitGroupsRegion = toVkRegion(hitGroupsRegion); + const auto vk_callableGroupsRegion = toVkRegion(callableGroupsRegion); + + getFunctionTable().vkCmdTraceRaysKHR(m_cmdbuf, + &vk_raygenGroupRegion, + &vk_missGroupsRegion, + &vk_hitGroupsRegion, + &vk_callableGroupsRegion, + width, height, depth); + return true; +} + bool CVulkanCommandBuffer::executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) { IGPUCommandPool::StackAllocation vk_commandBuffers(m_cmdpool,count); diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 93875002f6..87dc32c42e 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -225,6 +225,13 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) override; bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) override; + bool traceRays_impl( + const asset::SStridedBufferRegion& raygenGroupRegion, + const asset::SStridedBufferRegion& missGroupsRegion, + const asset::SStridedBufferRegion& hitGroupsRegion, + const asset::SStridedBufferRegion& callableGroupsRegion, + uint32_t width, uint32_t height, uint32_t depth) override; + bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) override; bool insertDebugMarker(const char* name, const core::vector4df_SIMD& color) override final diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index ec4bc67396..638a53b612 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1798,6 +1798,39 @@ bool IGPUCommandBuffer::resolveImage(const IGPUImage* const srcImage, const IGPU return resolveImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); } +bool IGPUCommandBuffer::traceRays(const asset::SStridedBufferRegion& raygenGroupRegion, + const asset::SStridedBufferRegion& missGroupsRegion, + const asset::SStridedBufferRegion& hitGroupsRegion, + const asset::SStridedBufferRegion& callableGroupsRegion, + uint32_t width, uint32_t height, uint32_t depth) +{ + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + return false; + + if (width == 0 || height == 0 || depth == 0) + { + NBL_LOG_ERROR("invalid work counts (%d, %d, %d)!", width, height, depth); + return false; + } + + // TODO(kevinyu) : add more validation + + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, + core::smart_refctd_ptr(raygenGroupRegion.buffer), + core::smart_refctd_ptr(missGroupsRegion.buffer), + core::smart_refctd_ptr(hitGroupsRegion.buffer), + core::smart_refctd_ptr(callableGroupsRegion.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + return traceRays_impl(raygenGroupRegion, missGroupsRegion, hitGroupsRegion, callableGroupsRegion, + width, height, depth); +} + + bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT|queue_flags_t::GRAPHICS_BIT|queue_flags_t::TRANSFER_BIT)) From aa314215799cbcab90544ecbccb7a3f40b8d48e2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 21 Jan 2025 11:13:26 +0700 Subject: [PATCH 05/68] Fix specializatio for ray tracing shaders Signed-off-by: kevyuu --- src/nbl/video/CVulkanLogicalDevice.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 4502c97fba..22ea156f1b 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1443,7 +1443,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( }); core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr }); core::vector vk_shaderGroup(maxShaderStages, { VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr}); - core::vector vk_specializationInfos(createInfos.size(), { 0, nullptr, 0, nullptr }); + core::vector vk_specializationInfos(maxShaderStages, { 0, nullptr, 0, nullptr }); core::vector vk_specializationMapEntry(validation.count); core::vector specializationData(validation.dataSize); @@ -1487,14 +1487,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( { if (specInfo.shader) { - const auto stage = specInfo.shader->getStage(); - *(outShaderStage++) = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .stage = static_cast(stage), - .module = static_cast(specInfo.shader)->getInternalObject(), - .pName = specInfo.entryPoint.c_str(), - }; + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(specInfo,outRequiredSubgroupSize,outSpecInfo,outSpecMapEntry,outSpecData); } } outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); From 3592cb5decd3f5b10ddb1e0f8722a20c624f106c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 21 Jan 2025 12:45:57 +0700 Subject: [PATCH 06/68] Add Validation to ray tracing pipeline creation Signed-off-by: kevyuu --- include/nbl/asset/IRayTracingPipeline.h | 23 +++++++++++++++++------ src/nbl/video/ILogicalDevice.cpp | 10 ++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index fb055a8dff..c84709cbba 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -50,7 +50,7 @@ namespace nbl::asset struct SCachedCreationParams final { SShaderGroupsParams shaderGroups; - uint64_t maxRecursionDepth; + uint32_t maxRecursionDepth; }; }; @@ -92,27 +92,38 @@ namespace nbl::asset if (getShaderStage(cached.shaderGroups.raygenGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) return false; + auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage) -> bool + { + if (index == SShaderGroupsParams::ShaderUnused) + return true; + if (index >= shaders.size()) + return false; + if (getShaderStage(index) != expectedStage) + return false; + return true; + }; + for (const auto& shaderGroup : cached.shaderGroups.hitGroups) { - if (shaderGroup.anyHitShaderIndex != SShaderGroupsParams::ShaderUnused && getShaderStage(shaderGroup.anyHitShaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT) + if (!isValidShaderIndex(shaderGroup.anyHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) return false; - if (shaderGroup.closestHitShaderIndex != SShaderGroupsParams::ShaderUnused && getShaderStage(shaderGroup.closestHitShaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT) + if (!isValidShaderIndex(shaderGroup.closestHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT)) return false; - if (shaderGroup.intersectionShaderIndex != SShaderGroupsParams::ShaderUnused && getShaderStage(shaderGroup.intersectionShaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION) + if (!isValidShaderIndex(shaderGroup.intersectionShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) return false; } for (const auto& shaderGroup : cached.shaderGroups.missGroups) { - if (getShaderStage(shaderGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_MISS) + if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_MISS)) return false; } for (const auto& shaderGroup : cached.shaderGroups.callableShaderGroups) { - if (getShaderStage(shaderGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_CALLABLE) + if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) return false; } return true; diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 80f51ac5a6..995f85ea80 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -962,6 +962,16 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline return false; } + const auto& limits = getPhysicalDeviceLimits(); + for (const auto& param : params) + { + if (param.cached.maxRecursionDepth > limits.maxRayRecursionDepth) + { + NBL_LOG_ERROR("Invalid maxRecursionDepth. maxRecursionDepth(%zu) exceed the limits(%zu)", param.cached.maxRecursionDepth, limits.maxRayRecursionDepth); + return false; + } + } + createRayTracingPipelines_impl(pipelineCache,params,output,specConstantValidation); bool retval = true; From 7757888b8aaa15c68cf1041fc3863dd08ccec958 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 21 Jan 2025 13:37:09 +0700 Subject: [PATCH 07/68] Implement Intersection Shader Signed-off-by: kevyuu --- src/nbl/video/CVulkanLogicalDevice.cpp | 3 ++- src/nbl/video/IGPUCommandBuffer.cpp | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 22ea156f1b..a6b1b61770 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1472,7 +1472,8 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( return { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .pNext = nullptr, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .type = group.intersectionShaderIndex == asset::SShaderGroupsParams::ShaderUnused ? + VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = getVkShaderIndex(group.closestHitShaderIndex), .anyHitShader = getVkShaderIndex(group.anyHitShaderIndex), diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 638a53b612..08744f50d5 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1813,9 +1813,6 @@ bool IGPUCommandBuffer::traceRays(const asset::SStridedBufferRegion& return false; } - // TODO(kevinyu) : add more validation - - if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(raygenGroupRegion.buffer), core::smart_refctd_ptr(missGroupsRegion.buffer), From d207addcaa9dfd12307fe267c3a83f888aeeb94b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 24 Jan 2025 19:11:53 +0700 Subject: [PATCH 08/68] Change order of shader groups in params to be consistent with TraceRays Signed-off-by: kevyuu --- include/nbl/asset/IRayTracingPipeline.h | 10 +++++----- src/nbl/video/CVulkanLogicalDevice.cpp | 8 ++++---- src/nbl/video/CVulkanRayTracingPipeline.cpp | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index c84709cbba..2e3741c3d1 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -29,13 +29,13 @@ namespace nbl::asset }; SGeneralGroup raygenGroup; - core::vector hitGroups; core::vector missGroups; - core::vector callableShaderGroups; + core::vector hitGroups; + core::vector callableGroups; inline uint32_t getShaderGroupCount() const { - return 1 + hitGroups.size() + missGroups.size() + callableShaderGroups.size(); + return 1 + hitGroups.size() + missGroups.size() + callableGroups.size(); } }; @@ -121,7 +121,7 @@ namespace nbl::asset return false; } - for (const auto& shaderGroup : cached.shaderGroups.callableShaderGroups) + for (const auto& shaderGroup : cached.shaderGroups.callableGroups) { if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) return false; @@ -147,7 +147,7 @@ namespace nbl::asset inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } size_t getHitGroupCount() const { return m_params.shaderGroups.hitGroups.size(); } size_t getMissGroupCount() const { return m_params.shaderGroups.missGroups.size(); } - size_t getCallableGroupCount() const { return m_params.shaderGroups.callableShaderGroups.size(); } + size_t getCallableGroupCount() const { return m_params.shaderGroups.callableGroups.size(); } protected: explicit IRayTracingPipeline(const SCreationParams& _params) : diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index a6b1b61770..7cac486ebb 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1496,13 +1496,13 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( const auto& shaderGroups = info.cached.shaderGroups; outCreateInfo->pGroups = outShaderGroup; *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroups.raygenGroup); - for (const auto& shaderGroup : shaderGroups.hitGroups) - *(outShaderGroup++) = getHitVkRayTracingShaderGroupCreateInfo(shaderGroup); for (const auto& shaderGroup : shaderGroups.missGroups) *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); - for (const auto& shaderGroup : shaderGroups.callableShaderGroups) + for (const auto& shaderGroup : shaderGroups.hitGroups) + *(outShaderGroup++) = getHitVkRayTracingShaderGroupCreateInfo(shaderGroup); + for (const auto& shaderGroup : shaderGroups.callableGroups) *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); - outCreateInfo->groupCount = 1 + shaderGroups.hitGroups.size() + shaderGroups.missGroups.size() + shaderGroups.callableShaderGroups.size(); + outCreateInfo->groupCount = 1 + shaderGroups.hitGroups.size() + shaderGroups.missGroups.size() + shaderGroups.callableGroups.size(); outCreateInfo->maxPipelineRayRecursionDepth = info.cached.maxRecursionDepth; } diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index a18509e036..fcc46ce86f 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -35,17 +35,17 @@ namespace nbl::video return {m_shaderGroupHandles->data(), handleSize}; } - std::span CVulkanRayTracingPipeline::getHitGroupShaderHandle(uint32_t index) const + std::span CVulkanRayTracingPipeline::getMissGroupShaderHandle(uint32_t index) const { const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; - const auto baseOffset = handleSize; // one raygen handle before this group + const auto baseOffset = handleSize; // one raygen this group return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; } - std::span CVulkanRayTracingPipeline::getMissGroupShaderHandle(uint32_t index) const + std::span CVulkanRayTracingPipeline::getHitGroupShaderHandle(uint32_t index) const { const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; - const auto baseOffset = handleSize + getHitGroupCount() * handleSize; // one raygen + hit groups handle before this group + const auto baseOffset = handleSize + getMissGroupCount() * handleSize; // one raygen + miss groups handle before this group return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; } @@ -54,7 +54,7 @@ namespace nbl::video const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; // one raygen + hit groups + miss groups handle before this group - const auto baseOffset = handleSize + getHitGroupCount() * handleSize + getMissGroupCount() * handleSize; + const auto baseOffset = handleSize + getMissGroupCount() * handleSize + getHitGroupCount() * handleSize; return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; } From 26f4c3f93ee87b09d4763c0a0cfb8a5104683805 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 24 Jan 2025 20:20:55 +0700 Subject: [PATCH 09/68] Fix buffer creation log Signed-off-by: kevyuu --- include/nbl/video/ILogicalDevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 2a4a3837ee..525fa98db9 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -326,7 +326,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe const auto maxSize = getPhysicalDeviceLimits().maxBufferSize; if (creationParams.size>maxSize) { - m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize); + m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit (%d)!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize); return nullptr; } return createBuffer_impl(std::move(creationParams)); From 8595c88dd31ed2e53a03338c742978938f27025e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 25 Jan 2025 09:19:32 +0700 Subject: [PATCH 10/68] Add more validation to ray tracing pipeline creation Signed-off-by: kevyuu --- include/nbl/video/ILogicalDevice.h | 2 +- src/nbl/video/ILogicalDevice.cpp | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 525fa98db9..157bb40415 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -326,7 +326,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe const auto maxSize = getPhysicalDeviceLimits().maxBufferSize; if (creationParams.size>maxSize) { - m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit (%d)!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize); + m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit (%u)!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize); return nullptr; } return createBuffer_impl(std::move(creationParams)); diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 995f85ea80..26515be548 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -962,12 +962,19 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline return false; } + const auto& features = getEnabledFeatures(); + if (!features.rayTracingPipeline) + { + NBL_LOG_ERROR("Feature `ray tracing pipeline` is not enabled"); + return false; + } + const auto& limits = getPhysicalDeviceLimits(); for (const auto& param : params) { if (param.cached.maxRecursionDepth > limits.maxRayRecursionDepth) { - NBL_LOG_ERROR("Invalid maxRecursionDepth. maxRecursionDepth(%zu) exceed the limits(%zu)", param.cached.maxRecursionDepth, limits.maxRayRecursionDepth); + NBL_LOG_ERROR("Invalid maxRecursionDepth. maxRecursionDepth(%u) exceed the limits(%u)", param.cached.maxRecursionDepth, limits.maxRayRecursionDepth); return false; } } From b027bdeb9fa0e8f653c8bb769be29056d05c7433 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 5 Feb 2025 17:19:51 +0700 Subject: [PATCH 11/68] Remove unnecessarry command --- src/nbl/video/IGPUCommandBuffer.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 08744f50d5..4e870f1877 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -886,9 +886,6 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline) { - // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, - // we cannot check renderpass-pipeline compatibility here. - // And checking before every drawcall would be performance suicide. if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT)) return false; From 53f27f0b7a5f92555bd319fd3b54bd9796db73bf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 5 Feb 2025 17:20:32 +0700 Subject: [PATCH 12/68] Refactor casting pipeline bind point --- include/nbl/video/CVulkanCommon.h | 16 +++++++++++++++- src/nbl/video/CVulkanCommandBuffer.cpp | 21 +-------------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index 1a0f53fdbb..ba708659a4 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -1082,7 +1082,21 @@ inline asset::IRenderpass::STORE_OP getAttachmentStoreOpFrom(const VkAttachmentS return static_cast(op); } - +inline VkPipelineBindPoint getVkPipelineBindPointFrom(asset::E_PIPELINE_BIND_POINT bindPoint) +{ + switch (bindPoint) + { + case asset::EPBP_GRAPHICS: + return VK_PIPELINE_BIND_POINT_GRAPHICS; + case asset::EPBP_COMPUTE: + return VK_PIPELINE_BIND_POINT_COMPUTE; + case asset::EPBP_RAY_TRACING: + return VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + default: + assert(!"Invalid code path."); + return VK_PIPELINE_BIND_POINT_MAX_ENUM; + } +} } #endif diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 0b09743c51..24c3182484 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -8,25 +8,6 @@ using namespace nbl; using namespace nbl::video; -namespace -{ - VkPipelineBindPoint vkCast(asset::E_PIPELINE_BIND_POINT bindPoint) - { - switch (bindPoint) - { - case asset::EPBP_GRAPHICS: - return VK_PIPELINE_BIND_POINT_GRAPHICS; - case asset::EPBP_COMPUTE: - return VK_PIPELINE_BIND_POINT_COMPUTE; - case asset::EPBP_RAY_TRACING: - return VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; - default: - // unreachable() macro - return static_cast(bindPoint); - } - } -} - const VolkDeviceTable& CVulkanCommandBuffer::getFunctionTable() const { return static_cast(getOriginDevice())->getFunctionTable()->vk; @@ -461,7 +442,7 @@ bool CVulkanCommandBuffer::bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_ dynamicOffsetCount += dynamicOffsetCountPerSet[setIndex]; getFunctionTable().vkCmdBindDescriptorSets( - m_cmdbuf,vkCast(pipelineBindPoint),vk_pipelineLayout, + m_cmdbuf,getVkPipelineBindPointFrom(pipelineBindPoint),vk_pipelineLayout, firstSet+first, last-first, vk_descriptorSets+first, dynamicOffsetCount, dynamicOffsets+dynamicOffsetsBindOffset ); From ee5bcf9aef1397eaaa91282bc0caaf63f3646394 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 5 Feb 2025 17:52:50 +0700 Subject: [PATCH 13/68] Use static constexpr for shaderGroupHandleSize --- include/nbl/video/SPhysicalDeviceLimits.h | 2 ++ src/nbl/video/CVulkanPhysicalDevice.cpp | 1 - src/nbl/video/CVulkanRayTracingPipeline.cpp | 10 +++++----- src/nbl/video/device_capabilities/device_limits.json | 1 + 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/nbl/video/SPhysicalDeviceLimits.h b/include/nbl/video/SPhysicalDeviceLimits.h index 10ddf60d3d..6714829924 100644 --- a/include/nbl/video/SPhysicalDeviceLimits.h +++ b/include/nbl/video/SPhysicalDeviceLimits.h @@ -25,6 +25,8 @@ struct SPhysicalDeviceLimits }; using RESOLVE_MODE_FLAGS = asset::IRenderpass::SCreationParams::SSubpassDescription::SDepthStencilAttachmentsRef::RESOLVE_MODE; + constexpr static inline uint32_t ShaderGroupHandleSize = 32; + #include "nbl/video/SPhysicalDeviceLimits_members.h" // utility functions diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 6d2ea63e08..79ffc7306d 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -601,7 +601,6 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart logger.log("Not enumerating VkPhysicalDevice %p because it reports limits of exact-type contrary to Vulkan specification!", system::ILogger::ELL_INFO, vk_physicalDevice); return nullptr; } - properties.limits.shaderGroupHandleSize = rayTracingPipelineProperties.shaderGroupHandleSize; properties.limits.maxRayRecursionDepth = rayTracingPipelineProperties.maxRayRecursionDepth; properties.limits.maxShaderGroupStride = rayTracingPipelineProperties.maxShaderGroupStride; properties.limits.shaderGroupBaseAlignment = rayTracingPipelineProperties.shaderGroupBaseAlignment; diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index fcc46ce86f..6f989d3c71 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -15,7 +15,7 @@ namespace nbl::video const auto* vulkanDevice = static_cast(getOriginDevice()); const auto handleCount = params.cached.shaderGroups.getShaderGroupCount(); - const auto handleSize = vulkanDevice->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; const auto dataSize = handleCount * handleSize; auto* vk = vulkanDevice->getFunctionTable(); m_shaderGroupHandles = core::make_refctd_dynamic_array(dataSize); @@ -31,27 +31,27 @@ namespace nbl::video std::span CVulkanRayTracingPipeline::getRaygenGroupShaderHandle() const { - const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; return {m_shaderGroupHandles->data(), handleSize}; } std::span CVulkanRayTracingPipeline::getMissGroupShaderHandle(uint32_t index) const { - const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; const auto baseOffset = handleSize; // one raygen this group return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; } std::span CVulkanRayTracingPipeline::getHitGroupShaderHandle(uint32_t index) const { - const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; const auto baseOffset = handleSize + getMissGroupCount() * handleSize; // one raygen + miss groups handle before this group return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; } std::span CVulkanRayTracingPipeline::getCallableGroupShaderHandle(uint32_t index) const { - const auto handleSize = getOriginDevice()->getPhysicalDevice()->getLimits().shaderGroupHandleSize; + const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; // one raygen + hit groups + miss groups handle before this group const auto baseOffset = handleSize + getMissGroupCount() * handleSize + getHitGroupCount() * handleSize; diff --git a/src/nbl/video/device_capabilities/device_limits.json b/src/nbl/video/device_capabilities/device_limits.json index af15e57791..7ff920e121 100644 --- a/src/nbl/video/device_capabilities/device_limits.json +++ b/src/nbl/video/device_capabilities/device_limits.json @@ -1760,6 +1760,7 @@ "type": "uint32_t", "name": "shaderGroupHandleSize", "value": 32, + "expose": "DISABLE", "comment": ["`exact` limit type"] }, { From 64792a848e4596acf3a1b85f98f525934dc0d6fb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 5 Feb 2025 17:53:46 +0700 Subject: [PATCH 14/68] Fix indentation. --- include/nbl/video/CVulkanRayTracingPipeline.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h index f7b2c88ebf..944115ae70 100644 --- a/include/nbl/video/CVulkanRayTracingPipeline.h +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -10,9 +10,8 @@ namespace nbl::video { - - class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline - { +class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline +{ using ShaderRef = core::smart_refctd_ptr; using ShaderContainer = core::smart_refctd_dynamic_array; using ShaderHandleContainer = core::smart_refctd_dynamic_array; @@ -35,7 +34,7 @@ namespace nbl::video const VkPipeline m_vkPipeline; ShaderContainer m_shaders; ShaderHandleContainer m_shaderGroupHandles; - }; +}; } From 19dfc8490e4acbae434e996761315325eb87824c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 5 Feb 2025 17:56:16 +0700 Subject: [PATCH 15/68] Remove unnecessary colon --- include/nbl/asset/IRayTracingPipeline.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 2e3741c3d1..5dd54648b7 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -3,7 +3,6 @@ #include "nbl/asset/IShader.h" -#include "nbl/asset/RasterizationStates.h" #include "nbl/asset/IPipeline.h" #include "nbl/asset/IRenderpass.h" @@ -41,7 +40,6 @@ namespace nbl::asset }; using SGeneralShaderGroup = SShaderGroupsParams::SGeneralGroup; using SHitShaderGroup = SShaderGroupsParams::SHitGroup; - ; class IRayTracingPipelineBase : public virtual core::IReferenceCounted { From c9bd800c156bc8753e4cd675928948aa353c25e7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 5 Feb 2025 23:52:36 +0700 Subject: [PATCH 16/68] Refactor SShaderGroupParams to use span and remove it from SCachedParams --- include/nbl/asset/IRayTracingPipeline.h | 100 ++++++++++-------- include/nbl/video/CVulkanRayTracingPipeline.h | 2 +- src/nbl/video/CVulkanLogicalDevice.cpp | 2 +- src/nbl/video/CVulkanRayTracingPipeline.cpp | 2 +- 4 files changed, 60 insertions(+), 46 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 5dd54648b7..03d2f83b49 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -11,51 +11,55 @@ namespace nbl::asset { - struct SShaderGroupsParams - { - constexpr static inline uint32_t ShaderUnused = 0xffFFffFFu; - - struct SGeneralGroup - { - uint32_t shaderIndex = ShaderUnused; - }; - - struct SHitGroup - { - uint32_t closestHitShaderIndex = ShaderUnused; - uint32_t anyHitShaderIndex = ShaderUnused; - uint32_t intersectionShaderIndex = ShaderUnused; - }; - SGeneralGroup raygenGroup; - core::vector missGroups; - core::vector hitGroups; - core::vector callableGroups; +struct SShaderGroupsParams +{ + constexpr static inline uint32_t ShaderUnused = 0xffFFffFFu; - inline uint32_t getShaderGroupCount() const - { - return 1 + hitGroups.size() + missGroups.size() + callableGroups.size(); - } + struct SGeneralShaderGroup + { + uint32_t shaderIndex = ShaderUnused; + }; + struct SHitShaderGroup + { + uint32_t closestHitShaderIndex = ShaderUnused; + uint32_t anyHitShaderIndex = ShaderUnused; + uint32_t intersectionShaderIndex = ShaderUnused; }; - using SGeneralShaderGroup = SShaderGroupsParams::SGeneralGroup; - using SHitShaderGroup = SShaderGroupsParams::SHitGroup; - class IRayTracingPipelineBase : public virtual core::IReferenceCounted + SGeneralShaderGroup raygenGroup; + std::span missGroups; + std::span hitGroups; + std::span callableGroups; + + inline uint32_t getShaderGroupCount() const { - public: + return 1 + hitGroups.size() + missGroups.size() + callableGroups.size(); + } +}; +using SGeneralShaderGroup = SShaderGroupsParams::SGeneralShaderGroup; +using SHitShaderGroup = SShaderGroupsParams::SHitShaderGroup; + +class IRayTracingPipelineBase : public virtual core::IReferenceCounted +{ + public: struct SCachedCreationParams final { - SShaderGroupsParams shaderGroups; uint32_t maxRecursionDepth; }; - }; +}; - template - class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase - { +template +class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase +{ public: + + using SGeneralShaderGroupContainer = core::smart_refctd_dynamic_array; + using SHitShaderGroupContainer = core::smart_refctd_dynamic_array; + + struct SCreationParams : IPipeline::SCreationParams { protected: @@ -85,9 +89,9 @@ namespace nbl::asset return shaders[index].shader->getStage(); }; - if (cached.shaderGroups.raygenGroup.shaderIndex >= shaders.size()) + if (shaderGroups.raygenGroup.shaderIndex >= shaders.size()) return false; - if (getShaderStage(cached.shaderGroups.raygenGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + if (getShaderStage(shaderGroups.raygenGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) return false; auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage) -> bool @@ -101,7 +105,7 @@ namespace nbl::asset return true; }; - for (const auto& shaderGroup : cached.shaderGroups.hitGroups) + for (const auto& shaderGroup : shaderGroups.hitGroups) { if (!isValidShaderIndex(shaderGroup.anyHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) return false; @@ -113,13 +117,13 @@ namespace nbl::asset return false; } - for (const auto& shaderGroup : cached.shaderGroups.missGroups) + for (const auto& shaderGroup : shaderGroups.missGroups) { if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_MISS)) return false; } - for (const auto& shaderGroup : cached.shaderGroups.callableGroups) + for (const auto& shaderGroup : shaderGroups.callableGroups) { if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) return false; @@ -139,22 +143,32 @@ namespace nbl::asset } std::span shaders = {}; + SShaderGroupsParams shaderGroups; SCachedCreationParams cached = {}; }; inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } - size_t getHitGroupCount() const { return m_params.shaderGroups.hitGroups.size(); } - size_t getMissGroupCount() const { return m_params.shaderGroups.missGroups.size(); } - size_t getCallableGroupCount() const { return m_params.shaderGroups.callableGroups.size(); } + size_t getHitGroupCount() const { return m_hitShaderGroups->size(); } + size_t getMissGroupCount() const { return m_missShaderGroups->size(); } + size_t getCallableGroupCount() const { return m_callableShaderGroups->size(); } protected: explicit IRayTracingPipeline(const SCreationParams& _params) : IPipeline(core::smart_refctd_ptr(_params.layout)), - m_params(_params.cached) { - } + m_params(_params.cached), + m_raygenShaderGroup(_params.shaderGroups.raygenGroup), + m_missShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.missGroups)), + m_hitShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.hitGroups)), + m_callableShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.callableGroups)) + {} SCachedCreationParams m_params; - }; + SGeneralShaderGroup m_raygenShaderGroup; + SGeneralShaderGroupContainer m_missShaderGroups; + SHitShaderGroupContainer m_hitShaderGroups; + SGeneralShaderGroupContainer m_callableShaderGroups; + +}; } diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h index 944115ae70..20d5cb7ded 100644 --- a/include/nbl/video/CVulkanRayTracingPipeline.h +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -29,7 +29,7 @@ class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline std::span getCallableGroupShaderHandle(uint32_t index) const override; private: - ~CVulkanRayTracingPipeline(); + ~CVulkanRayTracingPipeline() override; const VkPipeline m_vkPipeline; ShaderContainer m_shaders; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 7cac486ebb..9572f20822 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1493,7 +1493,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( } outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); - const auto& shaderGroups = info.cached.shaderGroups; + const auto& shaderGroups = info.shaderGroups; outCreateInfo->pGroups = outShaderGroup; *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroups.raygenGroup); for (const auto& shaderGroup : shaderGroups.missGroups) diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index 6f989d3c71..7bd9166342 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -14,7 +14,7 @@ namespace nbl::video m_shaders->operator[](shaderIx) = ShaderRef(static_cast(params.shaders[shaderIx].shader)); const auto* vulkanDevice = static_cast(getOriginDevice()); - const auto handleCount = params.cached.shaderGroups.getShaderGroupCount(); + const auto handleCount = params.shaderGroups.getShaderGroupCount(); const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; const auto dataSize = handleCount * handleSize; auto* vk = vulkanDevice->getFunctionTable(); From 98c29b5bd3470f7cd05a3bff1a5154d8d584d781 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 6 Feb 2025 13:05:02 +0700 Subject: [PATCH 17/68] Remove SStridedBufferRegion. Use SBufferRange + stride instead. --- include/nbl/asset/IBuffer.h | 31 -------------------------- include/nbl/video/IGPUCommandBuffer.h | 17 +++++++------- src/nbl/video/CVulkanCommandBuffer.cpp | 26 ++++++++++----------- src/nbl/video/CVulkanCommandBuffer.h | 8 +++---- src/nbl/video/IGPUCommandBuffer.cpp | 23 +++++++++++-------- 5 files changed, 40 insertions(+), 65 deletions(-) diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h index 177b730b78..27888d96a1 100644 --- a/include/nbl/asset/IBuffer.h +++ b/include/nbl/asset/IBuffer.h @@ -117,37 +117,6 @@ struct SBufferRange inline bool operator!=(const SBufferRange& rhs) const { return !operator==(rhs); } }; -template -struct SStridedBufferRegion -{ - static constexpr inline size_t WholeBuffer = ~0ull; - - size_t offset = 0ull; - size_t stride = 0; - size_t size = WholeBuffer; - core::smart_refctd_ptr buffer = nullptr; - - - inline operator SStridedBufferRegion&() {return *reinterpret_cast*>(this);} - inline operator const SStridedBufferRegion&() const {return *reinterpret_cast*>(this);} - - explicit inline operator bool() const {return isValid();} - - inline bool isValid() const - { - if (!buffer || offset>=buffer->getSize() || size==0ull || stride>buffer->getSize()) - return false; - return actualSize()<=buffer->getSize()-offset; - } - - inline size_t actualSize() const - { - return size!=WholeBuffer ? size:buffer->getSize(); - } - inline bool operator==(const SStridedBufferRegion& rhs) const { return buffer==rhs.buffer && offset==rhs.offset && actualSize()==rhs.actualSize() && stride==rhs.stride; } - inline bool operator!=(const SStridedBufferRegion& rhs) const { return !operator==(rhs); } -}; - } namespace std diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 566bdbb7be..416af16fa0 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -524,10 +524,11 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject }; bool resolveImage(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* const pRegions); - bool IGPUCommandBuffer::traceRays(const asset::SStridedBufferRegion& raygenGroupRegion, - const asset::SStridedBufferRegion& missGroupsRegion, - const asset::SStridedBufferRegion& hitGroupsRegion, - const asset::SStridedBufferRegion& callableGroupsRegion, + bool IGPUCommandBuffer::traceRays( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth); //! Secondary CommandBuffer execute @@ -672,10 +673,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; virtual bool traceRays_impl( - const asset::SStridedBufferRegion& raygenGroupRegion, - const asset::SStridedBufferRegion& missGroupsRegion, - const asset::SStridedBufferRegion& hitGroupsRegion, - const asset::SStridedBufferRegion& callableGroupsRegion, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) = 0; virtual bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) = 0; diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 24c3182484..1fc761351e 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -817,28 +817,28 @@ bool CVulkanCommandBuffer::resolveImage_impl(const IGPUImage* const srcImage, co } bool CVulkanCommandBuffer::traceRays_impl( - const asset::SStridedBufferRegion& raygenGroupRegion, - const asset::SStridedBufferRegion& missGroupsRegion, - const asset::SStridedBufferRegion& hitGroupsRegion, - const asset::SStridedBufferRegion& callableGroupsRegion, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) { - auto toVkRegion = [](const asset::SStridedBufferRegion& region) -> VkStridedDeviceAddressRegionKHR + auto toVkRegion = [](const asset::SBufferRange& range, uint32_t stride) -> VkStridedDeviceAddressRegionKHR { - if (region.buffer.get() == nullptr) + if (range.buffer.get() == nullptr) return {}; return { - .deviceAddress = region.buffer->getDeviceAddress() + region.offset, - .stride = region.stride, - .size = region.size, + .deviceAddress = range.buffer->getDeviceAddress() + range.offset, + .stride = stride, + .size = range.size, }; }; - const auto vk_raygenGroupRegion = toVkRegion(raygenGroupRegion); - const auto vk_missGroupsRegion = toVkRegion(missGroupsRegion); - const auto vk_hitGroupsRegion = toVkRegion(hitGroupsRegion); - const auto vk_callableGroupsRegion = toVkRegion(callableGroupsRegion); + const auto vk_raygenGroupRegion = toVkRegion(raygenGroupRange, raygenGroupStride); + const auto vk_missGroupsRegion = toVkRegion(missGroupsRange, missGroupStride); + const auto vk_hitGroupsRegion = toVkRegion(hitGroupsRange, hitGroupStride); + const auto vk_callableGroupsRegion = toVkRegion(callableGroupsRange, callableGroupStride); getFunctionTable().vkCmdTraceRaysKHR(m_cmdbuf, &vk_raygenGroupRegion, diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 87dc32c42e..8acfc6a315 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -226,10 +226,10 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) override; bool traceRays_impl( - const asset::SStridedBufferRegion& raygenGroupRegion, - const asset::SStridedBufferRegion& missGroupsRegion, - const asset::SStridedBufferRegion& hitGroupsRegion, - const asset::SStridedBufferRegion& callableGroupsRegion, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) override; bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) override; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 4e870f1877..7734a691f5 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1795,10 +1795,11 @@ bool IGPUCommandBuffer::resolveImage(const IGPUImage* const srcImage, const IGPU return resolveImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); } -bool IGPUCommandBuffer::traceRays(const asset::SStridedBufferRegion& raygenGroupRegion, - const asset::SStridedBufferRegion& missGroupsRegion, - const asset::SStridedBufferRegion& hitGroupsRegion, - const asset::SStridedBufferRegion& callableGroupsRegion, +bool IGPUCommandBuffer::traceRays( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) @@ -1811,16 +1812,20 @@ bool IGPUCommandBuffer::traceRays(const asset::SStridedBufferRegion& } if (!m_cmdpool->m_commandListPool.emplace(m_commandList, - core::smart_refctd_ptr(raygenGroupRegion.buffer), - core::smart_refctd_ptr(missGroupsRegion.buffer), - core::smart_refctd_ptr(hitGroupsRegion.buffer), - core::smart_refctd_ptr(callableGroupsRegion.buffer))) + core::smart_refctd_ptr(raygenGroupRange.buffer), + core::smart_refctd_ptr(missGroupsRange.buffer), + core::smart_refctd_ptr(hitGroupsRange.buffer), + core::smart_refctd_ptr(callableGroupsRange.buffer))) { NBL_LOG_ERROR("out of host memory!"); return false; } - return traceRays_impl(raygenGroupRegion, missGroupsRegion, hitGroupsRegion, callableGroupsRegion, + return traceRays_impl( + raygenGroupRange, raygenGroupStride, + missGroupsRange, missGroupStride, + hitGroupsRange, hitGroupStride, + callableGroupsRange, callableGroupStride, width, height, depth); } From 98025d3c3e491482765e3526ae381ea740f000ab Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 6 Feb 2025 23:07:58 +0700 Subject: [PATCH 18/68] Add more validation to TraceRays --- include/nbl/asset/ICPUComputePipeline.h | 4 +- include/nbl/asset/IGraphicsPipeline.h | 2 +- include/nbl/asset/IPipeline.h | 5 +- include/nbl/asset/IRayTracingPipeline.h | 2 +- include/nbl/video/IGPUCommandBuffer.h | 5 ++ include/nbl/video/IGPUComputePipeline.h | 4 +- include/nbl/video/IGPURayTracingPipeline.h | 8 +++ src/nbl/video/IGPUCommandBuffer.cpp | 79 ++++++++++++++++++++++ 8 files changed, 103 insertions(+), 6 deletions(-) diff --git a/include/nbl/asset/ICPUComputePipeline.h b/include/nbl/asset/ICPUComputePipeline.h index 14b0277152..6d9734c1ae 100644 --- a/include/nbl/asset/ICPUComputePipeline.h +++ b/include/nbl/asset/ICPUComputePipeline.h @@ -25,7 +25,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> { if (!params.layout) return nullptr; - auto retval = new ICPUComputePipeline(core::smart_refctd_ptr(params.layout)); + auto retval = new ICPUComputePipeline(core::smart_refctd_ptr(params.layout), EPBP_COMPUTE); if (!retval->setSpecInfo(params.shader)) { retval->drop(); @@ -50,7 +50,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> base_t* clone_impl(core::smart_refctd_ptr&& layout) const override { - return new ICPUComputePipeline(std::move(layout)); + return new ICPUComputePipeline(std::move(layout), EPBP_COMPUTE); } inline IAsset* getDependant_impl(const size_t ix) override diff --git a/include/nbl/asset/IGraphicsPipeline.h b/include/nbl/asset/IGraphicsPipeline.h index 62861fdc9d..ec45f17cb2 100644 --- a/include/nbl/asset/IGraphicsPipeline.h +++ b/include/nbl/asset/IGraphicsPipeline.h @@ -155,7 +155,7 @@ class IGraphicsPipeline : public IPipeline, public IGraphics protected: explicit IGraphicsPipeline(const SCreationParams& _params) : - IPipeline(core::smart_refctd_ptr(_params.layout)), + IPipeline(core::smart_refctd_ptr(_params.layout), EPBP_GRAPHICS), m_params(_params.cached), m_renderpass(core::smart_refctd_ptr(_params.renderpass)) {} SCachedCreationParams m_params; diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index 40623876fe..7598e0f617 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -105,11 +105,14 @@ class IPipeline }; inline const PipelineLayout* getLayout() const {return m_layout.get();} + inline const asset::E_PIPELINE_BIND_POINT getBindPoint() const { return m_bindPoint; } protected: - inline IPipeline(core::smart_refctd_ptr&& _layout) : m_layout(std::move(_layout)) {} + inline IPipeline(core::smart_refctd_ptr&& _layout, asset::E_PIPELINE_BIND_POINT bindPoint) + : m_layout(std::move(_layout)), m_bindPoint(bindPoint) {} core::smart_refctd_ptr m_layout; + asset::E_PIPELINE_BIND_POINT m_bindPoint; }; } diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 03d2f83b49..e819e906dd 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -154,7 +154,7 @@ class IRayTracingPipeline : public IPipeline, public IRayTra protected: explicit IRayTracingPipeline(const SCreationParams& _params) : - IPipeline(core::smart_refctd_ptr(_params.layout)), + IPipeline(core::smart_refctd_ptr(_params.layout), EPBP_RAY_TRACING), m_params(_params.cached), m_raygenShaderGroup(_params.shaderGroups.raygenGroup), m_missShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.missGroups)), diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 416af16fa0..1f1e991f98 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -542,6 +542,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual const void* getNativeHandle() const = 0; inline const core::unordered_map& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; } + inline const asset::IPipeline* getBoundPipeline() const { return m_boundPipeline; } protected: friend class IQueue; @@ -700,6 +701,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_state = STATE::INITIAL; m_boundDescriptorSetsRecord.clear(); + m_boundPipeline = nullptr; m_commandList.head = nullptr; m_commandList.tail = nullptr; @@ -713,6 +715,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject { deleteCommandList(); m_boundDescriptorSetsRecord.clear(); + m_boundPipeline = nullptr; releaseResourcesBackToPool_impl(); } @@ -839,6 +842,8 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject // created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT // or IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT. core::unordered_map m_boundDescriptorSetsRecord; + const asset::IPipeline* m_boundPipeline; + asset::E_PIPELINE_BIND_POINT m_boundPipelineBindPoint; IGPUCommandPool::CCommandSegmentListPool::SCommandSegmentList m_commandList = {}; diff --git a/include/nbl/video/IGPUComputePipeline.h b/include/nbl/video/IGPUComputePipeline.h index 1ecf7b668b..8653fde7fd 100644 --- a/include/nbl/video/IGPUComputePipeline.h +++ b/include/nbl/video/IGPUComputePipeline.h @@ -81,7 +81,9 @@ class IGPUComputePipeline : public IBackendObject, public asset::IPipeline&& _layout, const core::bitflag _flags) : - IBackendObject(core::smart_refctd_ptr(_layout->getOriginDevice())), pipeline_t(std::move(_layout)), m_flags(_flags) {} + IBackendObject(core::smart_refctd_ptr(_layout->getOriginDevice())), + pipeline_t(std::move(_layout), asset::EPBP_COMPUTE), + m_flags(_flags) {} virtual ~IGPUComputePipeline() = default; const core::bitflag m_flags; diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 0d6d0f13a0..72eb7c76e5 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -29,6 +29,14 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), LINK_TIME_OPTIMIZATION = base_flag(LINK_TIME_OPTIMIZATION), RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT = base_flag(RETAIN_LINK_TIME_OPTIMIZATION_INFO), + RAY_TRACING_SKIP_TRIANGLES_BIT_KHR = 1<<12, + RAY_TRACING_SKIP_AABBS_BIT_KHR = 1<<13, + RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, + RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, + RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, + RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 1<<17, + RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR = 1<<19, + RAY_TRACING_ALLOW_MOTION_BIT_NV = 1<<20, }; #undef base_flag diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 7734a691f5..4e4cdaf0bd 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -854,6 +854,8 @@ bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pip return false; } + m_boundPipeline = pipeline; + m_noCommands = false; bindComputePipeline_impl(pipeline); @@ -880,6 +882,8 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p return false; } + m_boundPipeline = pipeline; + m_noCommands = false; return bindGraphicsPipeline_impl(pipeline); } @@ -901,6 +905,8 @@ bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* con return false; } + m_boundPipeline = pipeline; + m_noCommands = false; return bindRayTracingPipeline_impl(pipeline); } @@ -1811,6 +1817,79 @@ bool IGPUCommandBuffer::traceRays( return false; } + if (m_boundPipeline == nullptr || m_boundPipeline->getBindPoint() != asset::EPBP_RAY_TRACING) + { + NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); + return false; + } + const IGPURayTracingPipeline* rayTracingPipeline = static_cast(m_boundPipeline); + const auto flags = rayTracingPipeline->getCreationFlags(); + + using PipelineFlag = IGPURayTracingPipeline::SCreationParams::FLAGS; + using PipelineFlags = core::bitflag; + + const auto shouldHaveHitGroup = flags & + (PipelineFlags(PipelineFlag::RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR) | + PipelineFlag::RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR | + PipelineFlag::RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR); + if (shouldHaveHitGroup && !hitGroupsRange.buffer) + { + NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + return false; + } + + const auto shouldHaveMissGroup = flags & PipelineFlag::RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR; + if (shouldHaveMissGroup && !missGroupsRange.buffer) + { + NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + return false; + } + + const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); + auto checkBufferRegion = [this, &limits](const asset::SBufferRange& range, uint32_t stride, const char* groupName) -> bool + { + const IGPUBuffer* const buffer = range.buffer.get(); + + if (!buffer) return true; + + if (!range.isValid()) + { + NBL_LOG_ERROR("%s buffer range is not valid!", groupName); + return false; + } + + if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) + { + NBL_LOG_ERROR("%s buffer must have EUF_SHADER_DEVICE_ADDRESS_BIT usage!", groupName); + return false; + } + + if (range.offset % limits.shaderGroupBaseAlignment != 0) + { + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupBaseAlignment); + return false; + } + + if (stride % limits.shaderGroupHandleAlignment) + { + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupHandleAlignment); + return false; + } + + if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT)) + { + NBL_LOG_ERROR("%s buffer must have EUF_SHADER_BINDING_TABLE_BIT usage!", groupName); + return false; + } + + return true; + }; + + if (!checkBufferRegion(raygenGroupRange, raygenGroupStride, "Raygen Group")) return false; + if (!checkBufferRegion(missGroupsRange, missGroupStride, "Miss groups")) return false; + if (!checkBufferRegion(hitGroupsRange, hitGroupStride, "Hit groups")) return false; + if (!checkBufferRegion(callableGroupsRange, callableGroupStride, "Callable groups")) return false; + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(raygenGroupRange.buffer), core::smart_refctd_ptr(missGroupsRange.buffer), From e9ed9e202463682af3144f46b015beb9671326f8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 11 Feb 2025 20:19:34 +0700 Subject: [PATCH 19/68] Implement set ray tracing pipeline stack size --- include/nbl/video/IGPUCommandBuffer.h | 2 ++ src/nbl/video/CVulkanCommandBuffer.cpp | 6 ++++++ src/nbl/video/CVulkanCommandBuffer.h | 1 + src/nbl/video/IGPUCommandBuffer.cpp | 5 +++++ 4 files changed, 14 insertions(+) diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 1f1e991f98..da976a24af 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -524,6 +524,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject }; bool resolveImage(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* const pRegions); + bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSize); bool IGPUCommandBuffer::traceRays( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, @@ -673,6 +674,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) = 0; virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; + virtual bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) = 0; virtual bool traceRays_impl( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 1fc761351e..c4063837fd 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -816,6 +816,12 @@ bool CVulkanCommandBuffer::resolveImage_impl(const IGPUImage* const srcImage, co return true; } +bool CVulkanCommandBuffer::setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) +{ + getFunctionTable().vkCmdSetRayTracingPipelineStackSizeKHR(m_cmdbuf, pipelineStackSize); + return true; +} + bool CVulkanCommandBuffer::traceRays_impl( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 8acfc6a315..147dbb953b 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -225,6 +225,7 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) override; bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) override; + bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) override; bool traceRays_impl( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 4e4cdaf0bd..faf83d0a97 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1801,6 +1801,11 @@ bool IGPUCommandBuffer::resolveImage(const IGPUImage* const srcImage, const IGPU return resolveImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); } +bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSize) +{ + return setRayTracingPipelineStackSize_impl(pipelineStackSize); +} + bool IGPUCommandBuffer::traceRays( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, From 64a8760a1fceb67e9be2efd1a8984c34c8d3d9f3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 11 Feb 2025 22:38:25 +0700 Subject: [PATCH 20/68] Implement TraceRaysIndirect --- .../nbl/builtin/hlsl/indirect_commands.hlsl | 6 + include/nbl/video/CVulkanCommon.h | 12 ++ include/nbl/video/IGPUCommandBuffer.h | 19 ++ include/nbl/video/IGPUCommandPool.h | 24 +++ src/nbl/video/CVulkanCommandBuffer.cpp | 40 ++-- src/nbl/video/CVulkanCommandBuffer.h | 6 + src/nbl/video/IGPUCommandBuffer.cpp | 197 ++++++++++++------ 7 files changed, 230 insertions(+), 74 deletions(-) diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl index efc5f8a3c3..ae8924562f 100644 --- a/include/nbl/builtin/hlsl/indirect_commands.hlsl +++ b/include/nbl/builtin/hlsl/indirect_commands.hlsl @@ -37,6 +37,12 @@ struct DispatchIndirectCommand_t uint32_t num_groups_z; }; +struct TraceRaysIndirectCommand_t +{ + uint32_t width; + uint32_t height; + uint32_t depth; +}; } } diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index ba708659a4..697c6e528e 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -1097,6 +1097,18 @@ inline VkPipelineBindPoint getVkPipelineBindPointFrom(asset::E_PIPELINE_BIND_POI return VK_PIPELINE_BIND_POINT_MAX_ENUM; } } + +inline VkStridedDeviceAddressRegionKHR getVkStridedDeviceAddressRegion(const asset::SBufferRange& range, uint32_t stride) +{ + if (range.buffer.get() == nullptr) + return {}; + + return { + .deviceAddress = range.buffer->getDeviceAddress() + range.offset, + .stride = stride, + .size = range.size, + }; +} } #endif diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index da976a24af..08f74d4b50 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -531,6 +531,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth); + bool IGPUCommandBuffer::traceRaysIndirect( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferBinding& binding); //! Secondary CommandBuffer execute bool executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs); @@ -681,6 +687,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) = 0; + virtual bool IGPUCommandBuffer::traceRaysIndirect_impl( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferBinding& binding) = 0; virtual bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) = 0; @@ -827,6 +839,13 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject } return invalidImage(image,IGPUImage::EUF_TRANSFER_SRC_BIT); } + + bool invalidShaderGroups( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + core::bitflag flags) const; // returns total number of Geometries across all AS build infos template diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index 95c3315808..baede33259 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -153,6 +153,7 @@ class IGPUCommandPool : public IBackendObject class CCopyAccelerationStructureCmd; class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR class CTraceRaysCmd; + class CTraceRaysIndirectCmd; class CBindRayTracingPipelineCmd; protected: @@ -846,6 +847,29 @@ class IGPUCommandPool::CTraceRaysCmd final : public IFixedSizeCommand m_callableGroupsBuffer; }; +class IGPUCommandPool::CTraceRaysIndirectCmd final : public IFixedSizeCommand +{ + public: + CTraceRaysIndirectCmd( + core::smart_refctd_ptr&& raygenGroupBuffer, + core::smart_refctd_ptr&& hitGroupsBuffer, + core::smart_refctd_ptr&& missGroupsBuffer, + core::smart_refctd_ptr&& callableGroupsBuffer, + core::smart_refctd_ptr&& bindingBuffer) : + m_raygenGroupBuffer(raygenGroupBuffer), + m_hitGroupsBuffer(hitGroupsBuffer), + m_missGroupsBuffer(missGroupsBuffer), + m_callableGroupsBuffer(callableGroupsBuffer), + m_bindingBuffer(bindingBuffer) {} + + + private: + core::smart_refctd_ptr m_raygenGroupBuffer; + core::smart_refctd_ptr m_hitGroupsBuffer; + core::smart_refctd_ptr m_missGroupsBuffer; + core::smart_refctd_ptr m_callableGroupsBuffer; + core::smart_refctd_ptr m_bindingBuffer; +}; class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand { diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index c4063837fd..4e5bf18878 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -829,22 +829,10 @@ bool CVulkanCommandBuffer::traceRays_impl( const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) { - auto toVkRegion = [](const asset::SBufferRange& range, uint32_t stride) -> VkStridedDeviceAddressRegionKHR - { - if (range.buffer.get() == nullptr) - return {}; - - return { - .deviceAddress = range.buffer->getDeviceAddress() + range.offset, - .stride = stride, - .size = range.size, - }; - }; - - const auto vk_raygenGroupRegion = toVkRegion(raygenGroupRange, raygenGroupStride); - const auto vk_missGroupsRegion = toVkRegion(missGroupsRange, missGroupStride); - const auto vk_hitGroupsRegion = toVkRegion(hitGroupsRange, hitGroupStride); - const auto vk_callableGroupsRegion = toVkRegion(callableGroupsRange, callableGroupStride); + const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupStride); + const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); + const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(hitGroupsRange, hitGroupStride); + const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(callableGroupsRange, callableGroupStride); getFunctionTable().vkCmdTraceRaysKHR(m_cmdbuf, &vk_raygenGroupRegion, @@ -855,6 +843,26 @@ bool CVulkanCommandBuffer::traceRays_impl( return true; } +bool CVulkanCommandBuffer::traceRaysIndirect_impl( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferBinding& binding) +{ + const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupStride); + const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); + const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(hitGroupsRange, hitGroupStride); + const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(callableGroupsRange, callableGroupStride); + + getFunctionTable().vkCmdTraceRaysIndirectKHR(m_cmdbuf, + &vk_raygenGroupRegion, + &vk_missGroupsRegion, + &vk_hitGroupsRegion, + &vk_callableGroupsRegion, + binding.buffer->getDeviceAddress() + binding.offset); + return true; +} bool CVulkanCommandBuffer::executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) { IGPUCommandPool::StackAllocation vk_commandBuffers(m_cmdpool,count); diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 147dbb953b..1d49027f12 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -232,6 +232,12 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) override; + bool traceRaysIndirect_impl( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferBinding& binding) override; bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) override; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index faf83d0a97..8629fc7ec5 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -682,6 +682,87 @@ bool IGPUCommandBuffer::copyImage(const IGPUImage* const srcImage, const IGPUIma return copyImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); } +bool IGPUCommandBuffer::invalidShaderGroups( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + core::bitflag flags) const +{ + + using PipelineFlag = IGPURayTracingPipeline::SCreationParams::FLAGS; + using PipelineFlags = core::bitflag; + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03696 + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03697 + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03512 + const auto shouldHaveHitGroup = flags & + (PipelineFlags(PipelineFlag::RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR) | + PipelineFlag::RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR | + PipelineFlag::RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR); + if (shouldHaveHitGroup && !hitGroupsRange.buffer) + { + NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03511 + const auto shouldHaveMissGroup = flags & PipelineFlag::RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR; + if (shouldHaveMissGroup && !missGroupsRange.buffer) + { + NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + return true; + } + + const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); + auto invalidBufferRegion = [this, &limits](const asset::SBufferRange& range, uint32_t stride, const char* groupName) -> bool + { + const IGPUBuffer* const buffer = range.buffer.get(); + + if (!buffer) return false; + + if (!range.isValid()) + { + NBL_LOG_ERROR("%s buffer range is not valid!", groupName); + return true; + } + + if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) + { + NBL_LOG_ERROR("%s buffer must have EUF_SHADER_DEVICE_ADDRESS_BIT usage!", groupName); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pRayGenShaderBindingTable-03689 + if (range.offset % limits.shaderGroupBaseAlignment != 0) + { + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupBaseAlignment); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pHitShaderBindingTable-03690 + if (stride % limits.shaderGroupHandleAlignment) + { + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupHandleAlignment); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pRayGenShaderBindingTable-03681 + if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT)) + { + NBL_LOG_ERROR("%s buffer must have EUF_SHADER_BINDING_TABLE_BIT usage!", groupName); + return true; + } + + return false; + }; + + if (invalidBufferRegion(raygenGroupRange, raygenGroupStride, "Raygen Group")) return true; + if (invalidBufferRegion(missGroupsRange, missGroupStride, "Miss groups")) return true; + if (invalidBufferRegion(hitGroupsRange, hitGroupStride, "Hit groups")) return true; + if (invalidBufferRegion(callableGroupsRange, callableGroupStride, "Callable groups")) return true; + return false; +} template uint32_t IGPUCommandBuffer::buildAccelerationStructures_common(const std::span infos, BuildRangeInfos ranges, const IGPUBuffer* const indirectBuffer) @@ -1830,90 +1911,90 @@ bool IGPUCommandBuffer::traceRays( const IGPURayTracingPipeline* rayTracingPipeline = static_cast(m_boundPipeline); const auto flags = rayTracingPipeline->getCreationFlags(); - using PipelineFlag = IGPURayTracingPipeline::SCreationParams::FLAGS; - using PipelineFlags = core::bitflag; - - const auto shouldHaveHitGroup = flags & - (PipelineFlags(PipelineFlag::RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR) | - PipelineFlag::RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR | - PipelineFlag::RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR); - if (shouldHaveHitGroup && !hitGroupsRange.buffer) + if (invalidShaderGroups(raygenGroupRange, raygenGroupStride, + missGroupsRange, missGroupStride, + hitGroupsRange, hitGroupStride, + callableGroupsRange, callableGroupStride, + flags)) { - NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + NBL_LOG_ERROR("invalid shader groups for traceRays command!"); return false; } - const auto shouldHaveMissGroup = flags & PipelineFlag::RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR; - if (shouldHaveMissGroup && !missGroupsRange.buffer) + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, + core::smart_refctd_ptr(raygenGroupRange.buffer), + core::smart_refctd_ptr(missGroupsRange.buffer), + core::smart_refctd_ptr(hitGroupsRange.buffer), + core::smart_refctd_ptr(callableGroupsRange.buffer))) { - NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + NBL_LOG_ERROR("out of host memory!"); return false; } - const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); - auto checkBufferRegion = [this, &limits](const asset::SBufferRange& range, uint32_t stride, const char* groupName) -> bool - { - const IGPUBuffer* const buffer = range.buffer.get(); - - if (!buffer) return true; - - if (!range.isValid()) - { - NBL_LOG_ERROR("%s buffer range is not valid!", groupName); - return false; - } - - if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) - { - NBL_LOG_ERROR("%s buffer must have EUF_SHADER_DEVICE_ADDRESS_BIT usage!", groupName); - return false; - } - - if (range.offset % limits.shaderGroupBaseAlignment != 0) - { - NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupBaseAlignment); - return false; - } + m_noCommands = false; - if (stride % limits.shaderGroupHandleAlignment) - { - NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupHandleAlignment); - return false; - } + return traceRays_impl( + raygenGroupRange, raygenGroupStride, + missGroupsRange, missGroupStride, + hitGroupsRange, hitGroupStride, + callableGroupsRange, callableGroupStride, + width, height, depth); +} - if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT)) - { - NBL_LOG_ERROR("%s buffer must have EUF_SHADER_BINDING_TABLE_BIT usage!", groupName); - return false; - } +bool IGPUCommandBuffer::traceRaysIndirect( + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferBinding& binding) +{ + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + return false; - return true; - }; + if (m_boundPipeline == nullptr || m_boundPipeline->getBindPoint() != asset::EPBP_RAY_TRACING) + { + NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); + return false; + } + const IGPURayTracingPipeline* rayTracingPipeline = static_cast(m_boundPipeline); + const auto flags = rayTracingPipeline->getCreationFlags(); - if (!checkBufferRegion(raygenGroupRange, raygenGroupStride, "Raygen Group")) return false; - if (!checkBufferRegion(missGroupsRange, missGroupStride, "Miss groups")) return false; - if (!checkBufferRegion(hitGroupsRange, hitGroupStride, "Hit groups")) return false; - if (!checkBufferRegion(callableGroupsRange, callableGroupStride, "Callable groups")) return false; + if (invalidShaderGroups(raygenGroupRange, raygenGroupStride, + missGroupsRange, missGroupStride, + hitGroupsRange, hitGroupStride, + callableGroupsRange, callableGroupStride, + flags)) + { + NBL_LOG_ERROR("invalid shader groups for traceRays command!"); + return false; + } - if (!m_cmdpool->m_commandListPool.emplace(m_commandList, + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(raygenGroupRange.buffer), core::smart_refctd_ptr(missGroupsRange.buffer), core::smart_refctd_ptr(hitGroupsRange.buffer), - core::smart_refctd_ptr(callableGroupsRange.buffer))) + core::smart_refctd_ptr(callableGroupsRange.buffer), + core::smart_refctd_ptr(binding.buffer))) { NBL_LOG_ERROR("out of host memory!"); return false; } - return traceRays_impl( - raygenGroupRange, raygenGroupStride, + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03634 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03633 + if (invalidBufferBinding(binding, 4u,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) + return false; + + m_noCommands = false; + + return traceRaysIndirect_impl( + raygenGroupRange, raygenGroupStride, missGroupsRange, missGroupStride, hitGroupsRange, hitGroupStride, callableGroupsRange, callableGroupStride, - width, height, depth); + binding); } - bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT|queue_flags_t::GRAPHICS_BIT|queue_flags_t::TRANSFER_BIT)) From 889ebe3c4bc861dba02626bce6bdd2cedb9195d5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 11 Feb 2025 22:45:04 +0700 Subject: [PATCH 21/68] Fix TraceRays command parameter --- include/nbl/video/IGPUCommandBuffer.h | 16 ++++++++-------- src/nbl/video/CVulkanCommandBuffer.cpp | 8 ++++---- src/nbl/video/CVulkanCommandBuffer.h | 8 ++++---- src/nbl/video/IGPUCommandBuffer.cpp | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 08f74d4b50..39b81870f8 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -526,10 +526,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSize); bool IGPUCommandBuffer::traceRays( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth); bool IGPUCommandBuffer::traceRaysIndirect( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, @@ -682,10 +682,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) = 0; virtual bool traceRays_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) = 0; virtual bool IGPUCommandBuffer::traceRaysIndirect_impl( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 4e5bf18878..e3aef505ef 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -823,10 +823,10 @@ bool CVulkanCommandBuffer::setRayTracingPipelineStackSize_impl(uint32_t pipeline } bool CVulkanCommandBuffer::traceRays_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) { const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupStride); diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 1d49027f12..e079f3c918 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -227,10 +227,10 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) override; bool traceRays_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) override; bool traceRaysIndirect_impl( const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 8629fc7ec5..c4bc2db773 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1888,10 +1888,10 @@ bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSiz } bool IGPUCommandBuffer::traceRays( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) From 32b6af7b2e03369711b9be0df14f7375bd4f72c1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Feb 2025 20:12:17 +0700 Subject: [PATCH 22/68] Remove unnecessary include --- include/nbl/asset/IRayTracingPipeline.h | 2 -- src/nbl/video/CVulkanCommandBuffer.cpp | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index e819e906dd..f527062eaf 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -1,10 +1,8 @@ #ifndef _NBL_ASSET_I_RAY_TRACING_PIPELINE_H_INCLUDED_ #define _NBL_ASSET_I_RAY_TRACING_PIPELINE_H_INCLUDED_ - #include "nbl/asset/IShader.h" #include "nbl/asset/IPipeline.h" -#include "nbl/asset/IRenderpass.h" #include diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index e3aef505ef..5bfb0be861 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -8,6 +8,7 @@ using namespace nbl; using namespace nbl::video; + const VolkDeviceTable& CVulkanCommandBuffer::getFunctionTable() const { return static_cast(getOriginDevice())->getFunctionTable()->vk; From 5effecd4573ce5b78287057170e85c32b6aeb075 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 11:18:27 +0700 Subject: [PATCH 23/68] Fix indentation --- include/nbl/asset/IRayTracingPipeline.h | 128 ++++++++++++------------ 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index f527062eaf..88a1e9bf0b 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -60,89 +60,89 @@ class IRayTracingPipeline : public IPipeline, public IRayTra struct SCreationParams : IPipeline::SCreationParams { - protected: - using SpecInfo = ShaderType::SSpecInfo; - template - inline bool impl_valid(ExtraLambda&& extra) const - { - if (!IPipeline::SCreationParams::layout) - return false; - - core::bitflag stagePresence = {}; - for (const auto info : shaders) - if (info.shader) + protected: + using SpecInfo = ShaderType::SSpecInfo; + template + inline bool impl_valid(ExtraLambda&& extra) const + { + if (!IPipeline::SCreationParams::layout) + return false; + + core::bitflag stagePresence = {}; + for (const auto info : shaders) + if (info.shader) + { + if (!extra(info)) + return false; + const auto stage = info.shader->getStage(); + if (stage > ICPUShader::E_SHADER_STAGE::ESS_CALLABLE || stage < ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + return false; + if (stage == ICPUShader::E_SHADER_STAGE::ESS_RAYGEN && stagePresence.hasFlags(hlsl::ESS_RAYGEN)) + return false; + stagePresence |= stage; + } + + auto getShaderStage = [this](size_t index) -> ICPUShader::E_SHADER_STAGE + { + return shaders[index].shader->getStage(); + }; + + if (shaderGroups.raygenGroup.shaderIndex >= shaders.size()) + return false; + if (getShaderStage(shaderGroups.raygenGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + return false; + + auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage) -> bool + { + if (index == SShaderGroupsParams::ShaderUnused) + return true; + if (index >= shaders.size()) + return false; + if (getShaderStage(index) != expectedStage) + return false; + return true; + }; + + for (const auto& shaderGroup : shaderGroups.hitGroups) { - if (!extra(info)) + if (!isValidShaderIndex(shaderGroup.anyHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) return false; - const auto stage = info.shader->getStage(); - if (stage > ICPUShader::E_SHADER_STAGE::ESS_CALLABLE || stage < ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + + if (!isValidShaderIndex(shaderGroup.closestHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT)) return false; - if (stage == ICPUShader::E_SHADER_STAGE::ESS_RAYGEN && stagePresence.hasFlags(hlsl::ESS_RAYGEN)) + + if (!isValidShaderIndex(shaderGroup.intersectionShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) return false; - stagePresence |= stage; } - auto getShaderStage = [this](size_t index) -> ICPUShader::E_SHADER_STAGE + for (const auto& shaderGroup : shaderGroups.missGroups) { - return shaders[index].shader->getStage(); - }; - - if (shaderGroups.raygenGroup.shaderIndex >= shaders.size()) - return false; - if (getShaderStage(shaderGroups.raygenGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) - return false; + if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_MISS)) + return false; + } - auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage) -> bool + for (const auto& shaderGroup : shaderGroups.callableGroups) { - if (index == SShaderGroupsParams::ShaderUnused) - return true; - if (index >= shaders.size()) + if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) return false; - if (getShaderStage(index) != expectedStage) - return false; - return true; - }; - - for (const auto& shaderGroup : shaderGroups.hitGroups) - { - if (!isValidShaderIndex(shaderGroup.anyHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) - return false; - - if (!isValidShaderIndex(shaderGroup.closestHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT)) - return false; - - if (!isValidShaderIndex(shaderGroup.intersectionShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) - return false; - } - - for (const auto& shaderGroup : shaderGroups.missGroups) - { - if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_MISS)) - return false; + } + return true; } - for (const auto& shaderGroup : shaderGroups.callableGroups) + public: + inline bool valid() const { - if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) - return false; - } - return true; - } - - public: - inline bool valid() const - { - return impl_valid([](const SpecInfo& info)->bool + return impl_valid([](const SpecInfo& info)->bool { if (!info.valid()) return false; return false; }); - } + } - std::span shaders = {}; - SShaderGroupsParams shaderGroups; - SCachedCreationParams cached = {}; + std::span shaders = {}; + SShaderGroupsParams shaderGroups; + SCachedCreationParams cached = {}; }; inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } From b5b9a7b5017442711d439683be1c00d2bec67499 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 11:19:14 +0700 Subject: [PATCH 24/68] Fix bound pipeline don't disturb each other. --- include/nbl/video/IGPUCommandBuffer.h | 17 ++++++++++++----- src/nbl/video/IGPUCommandBuffer.cpp | 16 +++++++--------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 39b81870f8..7b44d321e3 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -549,7 +549,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual const void* getNativeHandle() const = 0; inline const core::unordered_map& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; } - inline const asset::IPipeline* getBoundPipeline() const { return m_boundPipeline; } + const IGPUGraphicsPipeline* getBoundGraphicsPipeline() const { return m_boundGraphicsPipeline; } + const IGPUComputePipeline* getBoundComputePipeline() const { return m_boundComputePipeline; } + const IGPURayTracingPipeline* getBoundRayTracingPipeline() const { return m_boundRayTracingPipeline; } protected: friend class IQueue; @@ -715,7 +717,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_state = STATE::INITIAL; m_boundDescriptorSetsRecord.clear(); - m_boundPipeline = nullptr; + m_boundGraphicsPipeline= nullptr; + m_boundComputePipeline= nullptr; + m_boundRayTracingPipeline= nullptr; m_commandList.head = nullptr; m_commandList.tail = nullptr; @@ -729,7 +733,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject { deleteCommandList(); m_boundDescriptorSetsRecord.clear(); - m_boundPipeline = nullptr; + m_boundGraphicsPipeline= nullptr; + m_boundComputePipeline= nullptr; + m_boundRayTracingPipeline= nullptr; releaseResourcesBackToPool_impl(); } @@ -863,8 +869,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject // created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT // or IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT. core::unordered_map m_boundDescriptorSetsRecord; - const asset::IPipeline* m_boundPipeline; - asset::E_PIPELINE_BIND_POINT m_boundPipelineBindPoint; + const IGPUGraphicsPipeline* m_boundGraphicsPipeline; + const IGPUComputePipeline* m_boundComputePipeline; + const IGPURayTracingPipeline* m_boundRayTracingPipeline; IGPUCommandPool::CCommandSegmentListPool::SCommandSegmentList m_commandList = {}; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index c4bc2db773..d627f392fc 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -935,7 +935,7 @@ bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pip return false; } - m_boundPipeline = pipeline; + m_boundComputePipeline = pipeline; m_noCommands = false; bindComputePipeline_impl(pipeline); @@ -963,7 +963,7 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p return false; } - m_boundPipeline = pipeline; + m_boundGraphicsPipeline = pipeline; m_noCommands = false; return bindGraphicsPipeline_impl(pipeline); @@ -986,7 +986,7 @@ bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* con return false; } - m_boundPipeline = pipeline; + m_boundRayTracingPipeline = pipeline; m_noCommands = false; return bindRayTracingPipeline_impl(pipeline); @@ -1903,13 +1903,12 @@ bool IGPUCommandBuffer::traceRays( return false; } - if (m_boundPipeline == nullptr || m_boundPipeline->getBindPoint() != asset::EPBP_RAY_TRACING) + if (m_boundRayTracingPipeline == nullptr) { NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); return false; } - const IGPURayTracingPipeline* rayTracingPipeline = static_cast(m_boundPipeline); - const auto flags = rayTracingPipeline->getCreationFlags(); + const auto flags = m_boundRayTracingPipeline->getCreationFlags(); if (invalidShaderGroups(raygenGroupRange, raygenGroupStride, missGroupsRange, missGroupStride, @@ -1951,13 +1950,12 @@ bool IGPUCommandBuffer::traceRaysIndirect( if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; - if (m_boundPipeline == nullptr || m_boundPipeline->getBindPoint() != asset::EPBP_RAY_TRACING) + if (m_boundRayTracingPipeline == nullptr) { NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); return false; } - const IGPURayTracingPipeline* rayTracingPipeline = static_cast(m_boundPipeline); - const auto flags = rayTracingPipeline->getCreationFlags(); + const auto flags = m_boundRayTracingPipeline->getCreationFlags(); if (invalidShaderGroups(raygenGroupRange, raygenGroupStride, missGroupsRange, missGroupStride, From fc9917d28c2eded52972a02ec85aa2c734993ff0 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 17:05:54 +0700 Subject: [PATCH 25/68] Remove pipeline binding point from Pipeline class --- include/nbl/asset/ICPUComputePipeline.h | 4 ++-- include/nbl/asset/IGraphicsPipeline.h | 2 +- include/nbl/asset/IPipeline.h | 6 ++---- include/nbl/asset/IRayTracingPipeline.h | 2 +- include/nbl/video/IGPUComputePipeline.h | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/nbl/asset/ICPUComputePipeline.h b/include/nbl/asset/ICPUComputePipeline.h index 6d9734c1ae..14b0277152 100644 --- a/include/nbl/asset/ICPUComputePipeline.h +++ b/include/nbl/asset/ICPUComputePipeline.h @@ -25,7 +25,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> { if (!params.layout) return nullptr; - auto retval = new ICPUComputePipeline(core::smart_refctd_ptr(params.layout), EPBP_COMPUTE); + auto retval = new ICPUComputePipeline(core::smart_refctd_ptr(params.layout)); if (!retval->setSpecInfo(params.shader)) { retval->drop(); @@ -50,7 +50,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> base_t* clone_impl(core::smart_refctd_ptr&& layout) const override { - return new ICPUComputePipeline(std::move(layout), EPBP_COMPUTE); + return new ICPUComputePipeline(std::move(layout)); } inline IAsset* getDependant_impl(const size_t ix) override diff --git a/include/nbl/asset/IGraphicsPipeline.h b/include/nbl/asset/IGraphicsPipeline.h index ec45f17cb2..62861fdc9d 100644 --- a/include/nbl/asset/IGraphicsPipeline.h +++ b/include/nbl/asset/IGraphicsPipeline.h @@ -155,7 +155,7 @@ class IGraphicsPipeline : public IPipeline, public IGraphics protected: explicit IGraphicsPipeline(const SCreationParams& _params) : - IPipeline(core::smart_refctd_ptr(_params.layout), EPBP_GRAPHICS), + IPipeline(core::smart_refctd_ptr(_params.layout)), m_params(_params.cached), m_renderpass(core::smart_refctd_ptr(_params.renderpass)) {} SCachedCreationParams m_params; diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index 7598e0f617..f2e7cb19e4 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -105,14 +105,12 @@ class IPipeline }; inline const PipelineLayout* getLayout() const {return m_layout.get();} - inline const asset::E_PIPELINE_BIND_POINT getBindPoint() const { return m_bindPoint; } protected: - inline IPipeline(core::smart_refctd_ptr&& _layout, asset::E_PIPELINE_BIND_POINT bindPoint) - : m_layout(std::move(_layout)), m_bindPoint(bindPoint) {} + inline IPipeline(core::smart_refctd_ptr&& _layout) + : m_layout(std::move(_layout)) {} core::smart_refctd_ptr m_layout; - asset::E_PIPELINE_BIND_POINT m_bindPoint; }; } diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 88a1e9bf0b..55a436096a 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -152,7 +152,7 @@ class IRayTracingPipeline : public IPipeline, public IRayTra protected: explicit IRayTracingPipeline(const SCreationParams& _params) : - IPipeline(core::smart_refctd_ptr(_params.layout), EPBP_RAY_TRACING), + IPipeline(core::smart_refctd_ptr(_params.layout)), m_params(_params.cached), m_raygenShaderGroup(_params.shaderGroups.raygenGroup), m_missShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.missGroups)), diff --git a/include/nbl/video/IGPUComputePipeline.h b/include/nbl/video/IGPUComputePipeline.h index 8653fde7fd..1d5da7644c 100644 --- a/include/nbl/video/IGPUComputePipeline.h +++ b/include/nbl/video/IGPUComputePipeline.h @@ -82,7 +82,7 @@ class IGPUComputePipeline : public IBackendObject, public asset::IPipeline&& _layout, const core::bitflag _flags) : IBackendObject(core::smart_refctd_ptr(_layout->getOriginDevice())), - pipeline_t(std::move(_layout), asset::EPBP_COMPUTE), + pipeline_t(std::move(_layout)), m_flags(_flags) {} virtual ~IGPUComputePipeline() = default; From 98e1ef483f14ecf883567f69b3f6e346d836b250 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 17:08:41 +0700 Subject: [PATCH 26/68] Move SShaderGroupParams to IRayTracingPipelineBase --- include/nbl/asset/IRayTracingPipeline.h | 59 ++++++++++++------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 55a436096a..19973cd761 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -10,39 +10,38 @@ namespace nbl::asset { -struct SShaderGroupsParams -{ - constexpr static inline uint32_t ShaderUnused = 0xffFFffFFu; - - struct SGeneralShaderGroup - { - uint32_t shaderIndex = ShaderUnused; - }; - - struct SHitShaderGroup - { - uint32_t closestHitShaderIndex = ShaderUnused; - uint32_t anyHitShaderIndex = ShaderUnused; - uint32_t intersectionShaderIndex = ShaderUnused; - }; - - SGeneralShaderGroup raygenGroup; - std::span missGroups; - std::span hitGroups; - std::span callableGroups; - - inline uint32_t getShaderGroupCount() const - { - return 1 + hitGroups.size() + missGroups.size() + callableGroups.size(); - } - -}; -using SGeneralShaderGroup = SShaderGroupsParams::SGeneralShaderGroup; -using SHitShaderGroup = SShaderGroupsParams::SHitShaderGroup; - class IRayTracingPipelineBase : public virtual core::IReferenceCounted { public: + struct SShaderGroupsParams + { + constexpr static inline uint32_t ShaderUnused = 0xffFFffFFu; + + struct SGeneralShaderGroup + { + uint32_t shaderIndex = ShaderUnused; + }; + + struct SHitShaderGroup + { + uint32_t closestHitShaderIndex = ShaderUnused; + uint32_t anyHitShaderIndex = ShaderUnused; + uint32_t intersectionShaderIndex = ShaderUnused; + }; + + SGeneralShaderGroup raygenGroup; + std::span missGroups; + std::span hitGroups; + std::span callableGroups; + + inline uint32_t getShaderGroupCount() const + { + return 1 + hitGroups.size() + missGroups.size() + callableGroups.size(); + } + + }; + using SGeneralShaderGroup = SShaderGroupsParams::SGeneralShaderGroup; + using SHitShaderGroup = SShaderGroupsParams::SHitShaderGroup; struct SCachedCreationParams final { uint32_t maxRecursionDepth; From 4bed175b3459bf08363d884c6aab98915df085c7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 17:09:32 +0700 Subject: [PATCH 27/68] Rename parameter named binding to indirectBinding --- include/nbl/video/IGPUCommandBuffer.h | 4 ++-- src/nbl/video/CVulkanCommandBuffer.cpp | 4 ++-- src/nbl/video/CVulkanCommandBuffer.h | 2 +- src/nbl/video/IGPUCommandBuffer.cpp | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 7b44d321e3..088191fba4 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -536,7 +536,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& binding); + const asset::SBufferBinding& indirectBinding); //! Secondary CommandBuffer execute bool executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs); @@ -694,7 +694,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& binding) = 0; + const asset::SBufferBinding& indirectBinding) = 0; virtual bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) = 0; diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 5bfb0be861..26d6036278 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -849,7 +849,7 @@ bool CVulkanCommandBuffer::traceRaysIndirect_impl( const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& binding) + const asset::SBufferBinding& indirectBinding) { const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupStride); const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); @@ -861,7 +861,7 @@ bool CVulkanCommandBuffer::traceRaysIndirect_impl( &vk_missGroupsRegion, &vk_hitGroupsRegion, &vk_callableGroupsRegion, - binding.buffer->getDeviceAddress() + binding.offset); + indirectBinding.buffer->getDeviceAddress() + indirectBinding.offset); return true; } bool CVulkanCommandBuffer::executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index e079f3c918..eb2c27c548 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -237,7 +237,7 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& binding) override; + const asset::SBufferBinding& indirectBinding) override; bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) override; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index d627f392fc..1127e48d69 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1945,7 +1945,7 @@ bool IGPUCommandBuffer::traceRaysIndirect( const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& binding) + const asset::SBufferBinding& indirectBinding) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; @@ -1972,7 +1972,7 @@ bool IGPUCommandBuffer::traceRaysIndirect( core::smart_refctd_ptr(missGroupsRange.buffer), core::smart_refctd_ptr(hitGroupsRange.buffer), core::smart_refctd_ptr(callableGroupsRange.buffer), - core::smart_refctd_ptr(binding.buffer))) + core::smart_refctd_ptr(indirectBinding.buffer))) { NBL_LOG_ERROR("out of host memory!"); return false; @@ -1980,7 +1980,7 @@ bool IGPUCommandBuffer::traceRaysIndirect( // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03634 // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03633 - if (invalidBufferBinding(binding, 4u,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) + if (invalidBufferBinding(indirectBinding, 4u,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) return false; m_noCommands = false; @@ -1990,7 +1990,7 @@ bool IGPUCommandBuffer::traceRaysIndirect( missGroupsRange, missGroupStride, hitGroupsRange, hitGroupStride, callableGroupsRange, callableGroupStride, - binding); + indirectBinding); } bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) From bc968dced4cd7171bfcb1d83759b9ee8f4c6aff7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 18:21:27 +0700 Subject: [PATCH 28/68] Use ShaderGroupHandle instead of span of uint_t --- include/nbl/asset/IRayTracingPipeline.h | 7 +++ include/nbl/video/CVulkanRayTracingPipeline.h | 19 +++++--- include/nbl/video/IGPURayTracingPipeline.h | 8 ++-- src/nbl/video/CVulkanLogicalDevice.cpp | 22 +++++++--- src/nbl/video/CVulkanRayTracingPipeline.cpp | 44 +++++++++---------- 5 files changed, 61 insertions(+), 39 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 19973cd761..dee8f74cea 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -46,6 +46,13 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted { uint32_t maxRecursionDepth; }; + + struct SShaderGroupHandle + { + private: + uint8_t data[video::SPhysicalDeviceLimits::ShaderGroupHandleSize]; + }; + static_assert(sizeof(SShaderGroupHandle) == video::SPhysicalDeviceLimits::ShaderGroupHandleSize); }; template diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h index 20d5cb7ded..4be7f3bf91 100644 --- a/include/nbl/video/CVulkanRayTracingPipeline.h +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -14,26 +14,31 @@ class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline { using ShaderRef = core::smart_refctd_ptr; using ShaderContainer = core::smart_refctd_dynamic_array; - using ShaderHandleContainer = core::smart_refctd_dynamic_array; + public: - CVulkanRayTracingPipeline(const SCreationParams& params, const VkPipeline vk_pipeline); + using ShaderGroupHandleContainer = core::smart_refctd_dynamic_array; + + CVulkanRayTracingPipeline( + const SCreationParams& params, + const VkPipeline vk_pipeline, + ShaderGroupHandleContainer&& shaderGroupHandles); inline const void* getNativeHandle() const override { return &m_vkPipeline; } inline VkPipeline getInternalObject() const { return m_vkPipeline; } - std::span getRaygenGroupShaderHandle() const override; - std::span getHitGroupShaderHandle(uint32_t index) const override; - std::span getMissGroupShaderHandle(uint32_t index) const override; - std::span getCallableGroupShaderHandle(uint32_t index) const override; + virtual const SShaderGroupHandle& getRaygen() const override; + virtual const SShaderGroupHandle& getMiss(uint32_t index) const override; + virtual const SShaderGroupHandle& getHit(uint32_t index) const override; + virtual const SShaderGroupHandle& getCallable(uint32_t index) const override; private: ~CVulkanRayTracingPipeline() override; const VkPipeline m_vkPipeline; ShaderContainer m_shaders; - ShaderHandleContainer m_shaderGroupHandles; + ShaderGroupHandleContainer m_shaderGroupHandles; }; } diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 72eb7c76e5..91ced83a27 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -79,10 +79,10 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP // Vulkan: const VkPipeline* virtual const void* getNativeHandle() const = 0; - virtual std::span getRaygenGroupShaderHandle() const = 0; - virtual std::span getHitGroupShaderHandle(uint32_t index) const = 0; - virtual std::span getMissGroupShaderHandle(uint32_t index) const = 0; - virtual std::span getCallableGroupShaderHandle(uint32_t index) const = 0; + virtual const SShaderGroupHandle& getRaygen() const = 0; + virtual const SShaderGroupHandle& getMiss(uint32_t index) const = 0; + virtual const SShaderGroupHandle& getHit(uint32_t index) const = 0; + virtual const SShaderGroupHandle& getCallable(uint32_t index) const = 0; protected: IGPURayTracingPipeline(const SCreationParams& params) : IBackendObject(core::smart_refctd_ptr(params.layout->getOriginDevice())), diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 9572f20822..95719c3e39 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1432,6 +1432,10 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( const IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult& validation ) { + using SShaderGroupParams = asset::IRayTracingPipelineBase::SShaderGroupsParams; + using SGeneralShaderGroup = asset::IRayTracingPipelineBase::SGeneralShaderGroup; + using SHitShaderGroup = asset::IRayTracingPipelineBase::SHitShaderGroup; + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject():VK_NULL_HANDLE; size_t maxShaderStages = 0; @@ -1454,8 +1458,8 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( auto outSpecInfo = vk_specializationInfos.data(); auto outSpecMapEntry = vk_specializationMapEntry.data(); auto outSpecData = specializationData.data(); - auto getVkShaderIndex = [](uint32_t index) { return index == asset::SShaderGroupsParams::ShaderUnused ? VK_SHADER_UNUSED_KHR : index; }; - auto getGeneralVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](asset::SGeneralShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR + auto getVkShaderIndex = [](uint32_t index) { return index == SShaderGroupParams::ShaderUnused ? VK_SHADER_UNUSED_KHR : index; }; + auto getGeneralVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](SGeneralShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR { return { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, @@ -1467,12 +1471,12 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( .intersectionShader = VK_SHADER_UNUSED_KHR, }; }; - auto getHitVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](asset::SHitShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR + auto getHitVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](SHitShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR { return { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .pNext = nullptr, - .type = group.intersectionShaderIndex == asset::SShaderGroupsParams::ShaderUnused ? + .type = group.intersectionShaderIndex == SShaderGroupParams::ShaderUnused ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = getVkShaderIndex(group.closestHitShaderIndex), @@ -1515,9 +1519,17 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( const VkPipeline vk_pipeline = vk_pipelines[i]; // break the lifetime cause of the aliasing std::uninitialized_default_construct_n(output+i,1); + + const auto handleCount = info.shaderGroups.getShaderGroupCount(); + const auto dataSize = handleCount * sizeof(asset::IRayTracingPipelineBase::SShaderGroupHandle); + auto shaderGroupHandles = core::make_refctd_dynamic_array(handleCount); + const auto success = m_devf.vk.vkGetRayTracingShaderGroupHandlesKHR(m_vkdev, vk_pipeline, 0, handleCount, dataSize, shaderGroupHandles->data()) == VK_SUCCESS; + assert(success); + output[i] = core::make_smart_refctd_ptr( createInfos[i], - vk_pipeline + vk_pipeline, + std::move(shaderGroupHandles) ); } } diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index 7bd9166342..be3c15a709 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -1,24 +1,28 @@ #include "nbl/video/CVulkanRayTracingPipeline.h" +#include "nbl/asset/IRayTracingPipeline.h" #include "nbl/video/CVulkanLogicalDevice.h" namespace nbl::video { - CVulkanRayTracingPipeline::CVulkanRayTracingPipeline(const SCreationParams& params, const VkPipeline vk_pipeline) : + CVulkanRayTracingPipeline::CVulkanRayTracingPipeline( + const SCreationParams& params, + const VkPipeline vk_pipeline, + ShaderGroupHandleContainer&& shaderGroupHandles) : IGPURayTracingPipeline(params), m_vkPipeline(vk_pipeline), - m_shaders(core::make_refctd_dynamic_array(params.shaders.size())) + m_shaders(core::make_refctd_dynamic_array(params.shaders.size())), + m_shaderGroupHandles(std::move(shaderGroupHandles)) { for (size_t shaderIx = 0; shaderIx < params.shaders.size(); shaderIx++) m_shaders->operator[](shaderIx) = ShaderRef(static_cast(params.shaders[shaderIx].shader)); const auto* vulkanDevice = static_cast(getOriginDevice()); const auto handleCount = params.shaderGroups.getShaderGroupCount(); - const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; - const auto dataSize = handleCount * handleSize; + const auto dataSize = handleCount * sizeof(SShaderGroupHandle); auto* vk = vulkanDevice->getFunctionTable(); - m_shaderGroupHandles = core::make_refctd_dynamic_array(dataSize); + m_shaderGroupHandles = core::make_refctd_dynamic_array(handleCount); vk->vk.vkGetRayTracingShaderGroupHandlesKHR(vulkanDevice->getInternalObject(), m_vkPipeline, 0, handleCount, dataSize, m_shaderGroupHandles->data()); } @@ -29,33 +33,27 @@ namespace nbl::video vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); } - std::span CVulkanRayTracingPipeline::getRaygenGroupShaderHandle() const + + const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getRaygen() const { - const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; - return {m_shaderGroupHandles->data(), handleSize}; + return m_shaderGroupHandles->operator[](0); } - std::span CVulkanRayTracingPipeline::getMissGroupShaderHandle(uint32_t index) const + const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getMiss(uint32_t index) const { - const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; - const auto baseOffset = handleSize; // one raygen this group - return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; + const auto baseIndex = 1; // one raygen group before this groups + return m_shaderGroupHandles->operator[](baseIndex + index); } - std::span CVulkanRayTracingPipeline::getHitGroupShaderHandle(uint32_t index) const + const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getHit(uint32_t index) const { - const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; - const auto baseOffset = handleSize + getMissGroupCount() * handleSize; // one raygen + miss groups handle before this group - return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; + const auto baseIndex = 1 + getMissGroupCount(); // one raygen group + miss gropus before this groups + return m_shaderGroupHandles->operator[](baseIndex + index); } - std::span CVulkanRayTracingPipeline::getCallableGroupShaderHandle(uint32_t index) const + const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getCallable(uint32_t index) const { - const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; - - // one raygen + hit groups + miss groups handle before this group - const auto baseOffset = handleSize + getMissGroupCount() * handleSize + getHitGroupCount() * handleSize; - - return {m_shaderGroupHandles->data() + baseOffset + index * handleSize, handleSize}; + const auto baseIndex = 1 + getMissGroupCount() + getHitGroupCount(); // one raygen group + miss groups + hit gropus before this groups + return m_shaderGroupHandles->operator[](baseIndex + index); } } From a60dbc6a5e9a935c67ae45da2671453fb255a910 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 22:59:15 +0700 Subject: [PATCH 29/68] Remove unnecessary include --- src/nbl/video/CVulkanRayTracingPipeline.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index be3c15a709..4ae3fcb7da 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -1,6 +1,5 @@ #include "nbl/video/CVulkanRayTracingPipeline.h" -#include "nbl/asset/IRayTracingPipeline.h" #include "nbl/video/CVulkanLogicalDevice.h" namespace nbl::video From c9b2fc66aba5075c5ff939e36292e82b43e5dfd0 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 25 Feb 2025 23:12:20 +0700 Subject: [PATCH 30/68] Remove replay flag from ray tracing pipeline --- include/nbl/video/IGPURayTracingPipeline.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 91ced83a27..fa2fa4d823 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -35,7 +35,6 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 1<<17, - RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR = 1<<19, RAY_TRACING_ALLOW_MOTION_BIT_NV = 1<<20, }; #undef base_flag From d480405736d5a4298c08183a0c32ddba58707799 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 27 Feb 2025 14:39:38 +0700 Subject: [PATCH 31/68] Move SShaderGroupHandle to IGPURayTracingPipeline --- include/nbl/asset/IRayTracingPipeline.h | 8 -------- include/nbl/video/IGPURayTracingPipeline.h | 8 ++++++++ src/nbl/video/CVulkanLogicalDevice.cpp | 2 +- src/nbl/video/CVulkanRayTracingPipeline.cpp | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index dee8f74cea..62358e610d 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -46,13 +46,6 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted { uint32_t maxRecursionDepth; }; - - struct SShaderGroupHandle - { - private: - uint8_t data[video::SPhysicalDeviceLimits::ShaderGroupHandleSize]; - }; - static_assert(sizeof(SShaderGroupHandle) == video::SPhysicalDeviceLimits::ShaderGroupHandleSize); }; template @@ -63,7 +56,6 @@ class IRayTracingPipeline : public IPipeline, public IRayTra using SGeneralShaderGroupContainer = core::smart_refctd_dynamic_array; using SHitShaderGroupContainer = core::smart_refctd_dynamic_array; - struct SCreationParams : IPipeline::SCreationParams { protected: diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index fa2fa4d823..3603905e7f 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -15,6 +15,14 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP using pipeline_t = asset::IRayTracingPipeline; public: + + struct SShaderGroupHandle + { + private: + uint8_t data[video::SPhysicalDeviceLimits::ShaderGroupHandleSize]; + }; + static_assert(sizeof(SShaderGroupHandle) == video::SPhysicalDeviceLimits::ShaderGroupHandleSize); + struct SCreationParams final : pipeline_t::SCreationParams, SPipelineCreationParams { #define base_flag(F) static_cast(pipeline_t::SCreationParams::FLAGS::F) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 95719c3e39..8723357d1b 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1521,7 +1521,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( std::uninitialized_default_construct_n(output+i,1); const auto handleCount = info.shaderGroups.getShaderGroupCount(); - const auto dataSize = handleCount * sizeof(asset::IRayTracingPipelineBase::SShaderGroupHandle); + const auto dataSize = handleCount * sizeof(IGPURayTracingPipeline::SShaderGroupHandle); auto shaderGroupHandles = core::make_refctd_dynamic_array(handleCount); const auto success = m_devf.vk.vkGetRayTracingShaderGroupHandlesKHR(m_vkdev, vk_pipeline, 0, handleCount, dataSize, shaderGroupHandles->data()) == VK_SUCCESS; assert(success); diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index 4ae3fcb7da..aad0c36be3 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -33,24 +33,24 @@ namespace nbl::video } - const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getRaygen() const + const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getRaygen() const { return m_shaderGroupHandles->operator[](0); } - const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getMiss(uint32_t index) const + const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getMiss(uint32_t index) const { const auto baseIndex = 1; // one raygen group before this groups return m_shaderGroupHandles->operator[](baseIndex + index); } - const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getHit(uint32_t index) const + const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getHit(uint32_t index) const { const auto baseIndex = 1 + getMissGroupCount(); // one raygen group + miss gropus before this groups return m_shaderGroupHandles->operator[](baseIndex + index); } - const asset::IRayTracingPipelineBase::SShaderGroupHandle& CVulkanRayTracingPipeline::getCallable(uint32_t index) const + const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getCallable(uint32_t index) const { const auto baseIndex = 1 + getMissGroupCount() + getHitGroupCount(); // one raygen group + miss groups + hit gropus before this groups return m_shaderGroupHandles->operator[](baseIndex + index); From 6c6b70991f1cd41f82e9186f14023f927323e3e8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 12:54:50 +0700 Subject: [PATCH 32/68] Remove shader handles query in CVulkanRayTracingPipeline --- src/nbl/video/CVulkanRayTracingPipeline.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index aad0c36be3..675bd6ddd2 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -17,12 +17,6 @@ namespace nbl::video for (size_t shaderIx = 0; shaderIx < params.shaders.size(); shaderIx++) m_shaders->operator[](shaderIx) = ShaderRef(static_cast(params.shaders[shaderIx].shader)); - const auto* vulkanDevice = static_cast(getOriginDevice()); - const auto handleCount = params.shaderGroups.getShaderGroupCount(); - const auto dataSize = handleCount * sizeof(SShaderGroupHandle); - auto* vk = vulkanDevice->getFunctionTable(); - m_shaderGroupHandles = core::make_refctd_dynamic_array(handleCount); - vk->vk.vkGetRayTracingShaderGroupHandlesKHR(vulkanDevice->getInternalObject(), m_vkPipeline, 0, handleCount, dataSize, m_shaderGroupHandles->data()); } CVulkanRayTracingPipeline::~CVulkanRayTracingPipeline() From 5114b091240ef9306f7b6a53504157e643887325 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 12:56:27 +0700 Subject: [PATCH 33/68] Add space between group params and cached creation params for better readability --- include/nbl/asset/IRayTracingPipeline.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 62358e610d..54e4a10a77 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -42,6 +42,7 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted }; using SGeneralShaderGroup = SShaderGroupsParams::SGeneralShaderGroup; using SHitShaderGroup = SShaderGroupsParams::SHitShaderGroup; + struct SCachedCreationParams final { uint32_t maxRecursionDepth; From 3f0962cc68bcab6c2356b5caae030c389673c6c9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 18:16:41 +0700 Subject: [PATCH 34/68] Improve ShaderGroupParams naming --- include/nbl/asset/IRayTracingPipeline.h | 59 ++++++++++++------------- src/nbl/video/CVulkanLogicalDevice.cpp | 20 ++++----- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 54e4a10a77..235fed1d61 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -15,33 +15,32 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted public: struct SShaderGroupsParams { - constexpr static inline uint32_t ShaderUnused = 0xffFFffFFu; - - struct SGeneralShaderGroup + struct SIndex { - uint32_t shaderIndex = ShaderUnused; + constexpr static inline uint32_t Unused = 0xffFFffFFu; + uint32_t index = Unused; }; - struct SHitShaderGroup + struct SHitGroup { - uint32_t closestHitShaderIndex = ShaderUnused; - uint32_t anyHitShaderIndex = ShaderUnused; - uint32_t intersectionShaderIndex = ShaderUnused; + uint32_t closestHit = SIndex::Unused; + uint32_t anyHit = SIndex::Unused; + uint32_t intersectionShader = SIndex::Unused; }; - SGeneralShaderGroup raygenGroup; - std::span missGroups; - std::span hitGroups; - std::span callableGroups; + SIndex raygen; + std::span misses; + std::span hits; + std::span callables; inline uint32_t getShaderGroupCount() const { - return 1 + hitGroups.size() + missGroups.size() + callableGroups.size(); + return 1 + hits.size() + misses.size() + callables.size(); } }; - using SGeneralShaderGroup = SShaderGroupsParams::SGeneralShaderGroup; - using SHitShaderGroup = SShaderGroupsParams::SHitShaderGroup; + using SGeneralShaderGroup = SShaderGroupsParams::SIndex; + using SHitShaderGroup = SShaderGroupsParams::SHitGroup; struct SCachedCreationParams final { @@ -86,14 +85,14 @@ class IRayTracingPipeline : public IPipeline, public IRayTra return shaders[index].shader->getStage(); }; - if (shaderGroups.raygenGroup.shaderIndex >= shaders.size()) + if (shaderGroups.raygen.index >= shaders.size()) return false; - if (getShaderStage(shaderGroups.raygenGroup.shaderIndex) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + if (getShaderStage(shaderGroups.raygen.index) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) return false; auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage) -> bool { - if (index == SShaderGroupsParams::ShaderUnused) + if (index == SShaderGroupsParams::SIndex::Unused) return true; if (index >= shaders.size()) return false; @@ -102,27 +101,27 @@ class IRayTracingPipeline : public IPipeline, public IRayTra return true; }; - for (const auto& shaderGroup : shaderGroups.hitGroups) + for (const auto& shaderGroup : shaderGroups.hits) { - if (!isValidShaderIndex(shaderGroup.anyHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) + if (!isValidShaderIndex(shaderGroup.anyHit, ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) return false; - if (!isValidShaderIndex(shaderGroup.closestHitShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT)) + if (!isValidShaderIndex(shaderGroup.closestHit, ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT)) return false; - if (!isValidShaderIndex(shaderGroup.intersectionShaderIndex, ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) + if (!isValidShaderIndex(shaderGroup.intersectionShader, ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) return false; } - for (const auto& shaderGroup : shaderGroups.missGroups) + for (const auto& shaderGroup : shaderGroups.misses) { - if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_MISS)) + if (!isValidShaderIndex(shaderGroup.index, ICPUShader::E_SHADER_STAGE::ESS_MISS)) return false; } - for (const auto& shaderGroup : shaderGroups.callableGroups) + for (const auto& shaderGroup : shaderGroups.callables) { - if (!isValidShaderIndex(shaderGroup.shaderIndex, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) + if (!isValidShaderIndex(shaderGroup.index, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) return false; } return true; @@ -153,10 +152,10 @@ class IRayTracingPipeline : public IPipeline, public IRayTra explicit IRayTracingPipeline(const SCreationParams& _params) : IPipeline(core::smart_refctd_ptr(_params.layout)), m_params(_params.cached), - m_raygenShaderGroup(_params.shaderGroups.raygenGroup), - m_missShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.missGroups)), - m_hitShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.hitGroups)), - m_callableShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.callableGroups)) + m_raygenShaderGroup(_params.shaderGroups.raygen), + m_missShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.misses)), + m_hitShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.hits)), + m_callableShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.callables)) {} SCachedCreationParams m_params; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 8723357d1b..e6dc2e2fe3 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1465,7 +1465,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .pNext = nullptr, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = getVkShaderIndex(group.shaderIndex), + .generalShader = getVkShaderIndex(group.index), .closestHitShader = VK_SHADER_UNUSED_KHR, .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR, @@ -1476,12 +1476,12 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( return { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .pNext = nullptr, - .type = group.intersectionShaderIndex == SShaderGroupParams::ShaderUnused ? + .type = group.intersectionShader == SShaderGroupParams::SIndex::Unused ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, - .closestHitShader = getVkShaderIndex(group.closestHitShaderIndex), - .anyHitShader = getVkShaderIndex(group.anyHitShaderIndex), - .intersectionShader = getVkShaderIndex(group.intersectionShaderIndex), + .closestHitShader = getVkShaderIndex(group.closestHit), + .anyHitShader = getVkShaderIndex(group.anyHit), + .intersectionShader = getVkShaderIndex(group.intersectionShader), }; }; for (const auto& info : createInfos) @@ -1499,14 +1499,14 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( const auto& shaderGroups = info.shaderGroups; outCreateInfo->pGroups = outShaderGroup; - *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroups.raygenGroup); - for (const auto& shaderGroup : shaderGroups.missGroups) + *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroups.raygen); + for (const auto& shaderGroup : shaderGroups.misses) *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); - for (const auto& shaderGroup : shaderGroups.hitGroups) + for (const auto& shaderGroup : shaderGroups.hits) *(outShaderGroup++) = getHitVkRayTracingShaderGroupCreateInfo(shaderGroup); - for (const auto& shaderGroup : shaderGroups.callableGroups) + for (const auto& shaderGroup : shaderGroups.callables) *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); - outCreateInfo->groupCount = 1 + shaderGroups.hitGroups.size() + shaderGroups.missGroups.size() + shaderGroups.callableGroups.size(); + outCreateInfo->groupCount = 1 + shaderGroups.hits.size() + shaderGroups.misses.size() + shaderGroups.callables.size(); outCreateInfo->maxPipelineRayRecursionDepth = info.cached.maxRecursionDepth; } From a03701f20b721be775d07949ec59d89dc7a9e4b7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 18:17:31 +0700 Subject: [PATCH 35/68] Improve checking for shader stage --- include/nbl/asset/IRayTracingPipeline.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 235fed1d61..f810240c71 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -5,7 +5,8 @@ #include "nbl/asset/IPipeline.h" #include - +#include +#include namespace nbl::asset { @@ -73,9 +74,9 @@ class IRayTracingPipeline : public IPipeline, public IRayTra if (!extra(info)) return false; const auto stage = info.shader->getStage(); - if (stage > ICPUShader::E_SHADER_STAGE::ESS_CALLABLE || stage < ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) + if ((stage & ~ICPUShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING)!=0) return false; - if (stage == ICPUShader::E_SHADER_STAGE::ESS_RAYGEN && stagePresence.hasFlags(hlsl::ESS_RAYGEN)) + if (!std::has_single_bit>(stage)) return false; stagePresence |= stage; } From 44b450ed933899b098f815c167387811e598c630 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 18:18:00 +0700 Subject: [PATCH 36/68] Improve get group count method --- include/nbl/asset/IRayTracingPipeline.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index f810240c71..1c91b36aba 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -145,9 +145,9 @@ class IRayTracingPipeline : public IPipeline, public IRayTra }; inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } - size_t getHitGroupCount() const { return m_hitShaderGroups->size(); } - size_t getMissGroupCount() const { return m_missShaderGroups->size(); } - size_t getCallableGroupCount() const { return m_callableShaderGroups->size(); } + inline uint32_t getHitGroupCount() const { return m_hitShaderGroups->size(); } + inline uint32_t getMissGroupCount() const { return m_missShaderGroups->size(); } + inline uint32_t getCallableGroupCount() const { return m_callableShaderGroups->size(); } protected: explicit IRayTracingPipeline(const SCreationParams& _params) : From 9286a71b3b8ebec4399c951b7301f38b24ef04e4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 18:18:35 +0700 Subject: [PATCH 37/68] Fix maxShaderGroup calculation --- src/nbl/video/CVulkanLogicalDevice.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index e6dc2e2fe3..e17b88a70e 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1441,12 +1441,15 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( size_t maxShaderStages = 0; for (const auto& info : createInfos) maxShaderStages += info.shaders.size(); + size_t maxShaderGroups = 0; + for (const auto& info : createInfos) + maxShaderGroups += info.shaderGroups.getShaderGroupCount(); core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,nullptr }); core::vector vk_requiredSubgroupSize(maxShaderStages,{ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr }); core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr }); - core::vector vk_shaderGroup(maxShaderStages, { VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr}); + core::vector vk_shaderGroup(maxShaderGroups, { VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr}); core::vector vk_specializationInfos(maxShaderStages, { 0, nullptr, 0, nullptr }); core::vector vk_specializationMapEntry(validation.count); core::vector specializationData(validation.dataSize); @@ -1458,7 +1461,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( auto outSpecInfo = vk_specializationInfos.data(); auto outSpecMapEntry = vk_specializationMapEntry.data(); auto outSpecData = specializationData.data(); - auto getVkShaderIndex = [](uint32_t index) { return index == SShaderGroupParams::ShaderUnused ? VK_SHADER_UNUSED_KHR : index; }; + auto getVkShaderIndex = [](uint32_t index) { return index == SShaderGroupParams::SIndex::Unused ? VK_SHADER_UNUSED_KHR : index; }; auto getGeneralVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](SGeneralShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR { return { From cc3496235621131986efd2d02507853faee56fc6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 18:18:58 +0700 Subject: [PATCH 38/68] Check that shaders is not empty --- src/nbl/video/CVulkanLogicalDevice.cpp | 1 + src/nbl/video/ILogicalDevice.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index e17b88a70e..25e335adfd 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1499,6 +1499,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( } } outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); + assert(outCreateInfo->stageCount != 0); const auto& shaderGroups = info.shaderGroups; outCreateInfo->pGroups = outShaderGroup; diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 26515be548..888d595e51 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -974,8 +974,13 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline { if (param.cached.maxRecursionDepth > limits.maxRayRecursionDepth) { - NBL_LOG_ERROR("Invalid maxRecursionDepth. maxRecursionDepth(%u) exceed the limits(%u)", param.cached.maxRecursionDepth, limits.maxRayRecursionDepth); - return false; + NBL_LOG_ERROR("Invalid maxRecursionDepth. maxRecursionDepth(%u) exceed the limits(%u)", param.cached.maxRecursionDepth, limits.maxRayRecursionDepth); + return false; + } + if (param.getShaders().empty()) + { + NBL_LOG_ERROR("Pipeline must have at least one shader."); + return false; } } From 41ffc50862a583e713c61d532f273d3322c69456 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Mar 2025 18:19:23 +0700 Subject: [PATCH 39/68] Check for maxShaderGroupStride compliance --- src/nbl/video/IGPUCommandBuffer.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 1127e48d69..42421cd221 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -736,14 +736,20 @@ bool IGPUCommandBuffer::invalidShaderGroups( // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pRayGenShaderBindingTable-03689 if (range.offset % limits.shaderGroupBaseAlignment != 0) { - NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupBaseAlignment); + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", groupName, limits.shaderGroupBaseAlignment); return true; } // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pHitShaderBindingTable-03690 if (stride % limits.shaderGroupHandleAlignment) { - NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", limits.shaderGroupHandleAlignment); + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", groupName, limits.shaderGroupHandleAlignment); + return true; + } + + if (stride > limits.maxShaderGroupStride) + { + NBL_LOG_ERROR("%s buffer stride must not exceed %u!", groupName, limits.shaderGroupHandleAlignment); return true; } From cff672c7c28b8432eba06ea46401ff532a63c6ce Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 10 Mar 2025 17:02:57 +0700 Subject: [PATCH 40/68] Add more validation for trace ray dimensions. --- src/nbl/video/IGPUCommandBuffer.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 42421cd221..00f9215c8c 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1903,12 +1903,23 @@ bool IGPUCommandBuffer::traceRays( if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; - if (width == 0 || height == 0 || depth == 0) + const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); + const auto maxWidth = limits.maxComputeWorkGroupCount[0] * limits.maxWorkgroupSize[0]; + const auto maxHeight = limits.maxComputeWorkGroupCount[1] * limits.maxWorkgroupSize[1]; + const auto maxDepth = limits.maxComputeWorkGroupCount[2] * limits.maxWorkgroupSize[2]; + if (width == 0 || height == 0 || depth == 0 || width > maxWidth || height > maxHeight || depth > maxDepth) { NBL_LOG_ERROR("invalid work counts (%d, %d, %d)!", width, height, depth); return false; } + const auto invocationCount = width * height * depth; + if (invocationCount > limits.maxRayDispatchInvocationCount) + { + NBL_LOG_ERROR("invalid invocation count (%d)!", invocationCount); + return false; + } + if (m_boundRayTracingPipeline == nullptr) { NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); From 1db0c8ff637793d405674e39168b1a495d7dd73d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 10 Mar 2025 17:39:09 +0700 Subject: [PATCH 41/68] Fix shader group buffer alignment validation. --- src/nbl/video/IGPUCommandBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 00f9215c8c..4fa6ddcfe3 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -734,7 +734,7 @@ bool IGPUCommandBuffer::invalidShaderGroups( } // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pRayGenShaderBindingTable-03689 - if (range.offset % limits.shaderGroupBaseAlignment != 0) + if ((range.buffer->getDeviceAddress() + range.offset) % limits.shaderGroupBaseAlignment != 0) { NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", groupName, limits.shaderGroupBaseAlignment); return true; From b502404d08fc4c4cb20c434a67bb733e2554aee9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 10 Mar 2025 17:39:40 +0700 Subject: [PATCH 42/68] Remove unecessary raygenGroupStride parameter. --- include/nbl/video/IGPUCommandBuffer.h | 10 +++++----- src/nbl/video/CVulkanCommandBuffer.cpp | 8 ++++---- src/nbl/video/CVulkanCommandBuffer.h | 4 ++-- src/nbl/video/IGPUCommandBuffer.cpp | 16 ++++++++-------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 088191fba4..15af55e964 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -526,13 +526,13 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSize); bool IGPUCommandBuffer::traceRays( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth); bool IGPUCommandBuffer::traceRaysIndirect( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, @@ -684,13 +684,13 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) = 0; virtual bool traceRays_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) = 0; virtual bool IGPUCommandBuffer::traceRaysIndirect_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, @@ -847,7 +847,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject } bool invalidShaderGroups( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 26d6036278..7b42abd022 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -824,13 +824,13 @@ bool CVulkanCommandBuffer::setRayTracingPipelineStackSize_impl(uint32_t pipeline } bool CVulkanCommandBuffer::traceRays_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) { - const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupStride); + const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupRange.size); const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(hitGroupsRange, hitGroupStride); const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(callableGroupsRange, callableGroupStride); @@ -845,13 +845,13 @@ bool CVulkanCommandBuffer::traceRays_impl( } bool CVulkanCommandBuffer::traceRaysIndirect_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, const asset::SBufferBinding& indirectBinding) { - const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupStride); + const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupRange.size); const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(hitGroupsRange, hitGroupStride); const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(callableGroupsRange, callableGroupStride); diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index eb2c27c548..4549de22a3 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -227,13 +227,13 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) override; bool traceRays_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) override; bool traceRaysIndirect_impl( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 4fa6ddcfe3..c17c6860c7 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -683,7 +683,7 @@ bool IGPUCommandBuffer::copyImage(const IGPUImage* const srcImage, const IGPUIma } bool IGPUCommandBuffer::invalidShaderGroups( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, @@ -763,7 +763,7 @@ bool IGPUCommandBuffer::invalidShaderGroups( return false; }; - if (invalidBufferRegion(raygenGroupRange, raygenGroupStride, "Raygen Group")) return true; + if (invalidBufferRegion(raygenGroupRange, raygenGroupRange.size, "Raygen Group")) return true; if (invalidBufferRegion(missGroupsRange, missGroupStride, "Miss groups")) return true; if (invalidBufferRegion(hitGroupsRange, hitGroupStride, "Hit groups")) return true; if (invalidBufferRegion(callableGroupsRange, callableGroupStride, "Callable groups")) return true; @@ -1894,7 +1894,7 @@ bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSiz } bool IGPUCommandBuffer::traceRays( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, @@ -1927,7 +1927,7 @@ bool IGPUCommandBuffer::traceRays( } const auto flags = m_boundRayTracingPipeline->getCreationFlags(); - if (invalidShaderGroups(raygenGroupRange, raygenGroupStride, + if (invalidShaderGroups(raygenGroupRange, missGroupsRange, missGroupStride, hitGroupsRange, hitGroupStride, callableGroupsRange, callableGroupStride, @@ -1950,7 +1950,7 @@ bool IGPUCommandBuffer::traceRays( m_noCommands = false; return traceRays_impl( - raygenGroupRange, raygenGroupStride, + raygenGroupRange, missGroupsRange, missGroupStride, hitGroupsRange, hitGroupStride, callableGroupsRange, callableGroupStride, @@ -1958,7 +1958,7 @@ bool IGPUCommandBuffer::traceRays( } bool IGPUCommandBuffer::traceRaysIndirect( - const asset::SBufferRange& raygenGroupRange, uint32_t raygenGroupStride, + const asset::SBufferRange& raygenGroupRange, const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, @@ -1974,7 +1974,7 @@ bool IGPUCommandBuffer::traceRaysIndirect( } const auto flags = m_boundRayTracingPipeline->getCreationFlags(); - if (invalidShaderGroups(raygenGroupRange, raygenGroupStride, + if (invalidShaderGroups(raygenGroupRange, missGroupsRange, missGroupStride, hitGroupsRange, hitGroupStride, callableGroupsRange, callableGroupStride, @@ -2003,7 +2003,7 @@ bool IGPUCommandBuffer::traceRaysIndirect( m_noCommands = false; return traceRaysIndirect_impl( - raygenGroupRange, raygenGroupStride, + raygenGroupRange, missGroupsRange, missGroupStride, hitGroupsRange, hitGroupStride, callableGroupsRange, callableGroupStride, From 85114bee235d26ba5a93b83624a3b2b7dcfc008e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 10 Mar 2025 20:28:50 +0700 Subject: [PATCH 43/68] Fix set stack size dynamic state handling. --- include/nbl/asset/IRayTracingPipeline.h | 3 ++- src/nbl/video/CVulkanLogicalDevice.cpp | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 1c91b36aba..81f93bbd2d 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -45,7 +45,8 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted struct SCachedCreationParams final { - uint32_t maxRecursionDepth; + uint32_t maxRecursionDepth : 6 = 0; + uint32_t dynamicStackSize : 1 = false; }; }; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 25e335adfd..7ff7fdb15a 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1436,6 +1436,15 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( using SGeneralShaderGroup = asset::IRayTracingPipelineBase::SGeneralShaderGroup; using SHitShaderGroup = asset::IRayTracingPipelineBase::SHitShaderGroup; + const auto dynamicStates = std::array{ VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR }; + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0u, + .dynamicStateCount = dynamicStates.size(), + .pDynamicStates = dynamicStates.data(), + }; + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject():VK_NULL_HANDLE; size_t maxShaderStages = 0; @@ -1512,6 +1521,10 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); outCreateInfo->groupCount = 1 + shaderGroups.hits.size() + shaderGroups.misses.size() + shaderGroups.callables.size(); outCreateInfo->maxPipelineRayRecursionDepth = info.cached.maxRecursionDepth; + if (info.cached.dynamicStackSize) + { + outCreateInfo->pDynamicState = &vk_dynamicStateCreateInfo; + } } auto vk_pipelines = reinterpret_cast(output); From 20ab6d61e7596842257addc76f7cd59280914fe3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 10 Mar 2025 21:09:58 +0700 Subject: [PATCH 44/68] Add setRayTracingStackSize validation --- include/nbl/video/IGPUCommandBuffer.h | 3 +++ src/nbl/video/IGPUCommandBuffer.cpp | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 15af55e964..a5dc18781a 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -720,6 +720,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_boundGraphicsPipeline= nullptr; m_boundComputePipeline= nullptr; m_boundRayTracingPipeline= nullptr; + m_haveRtPipelineStackSize = false; m_commandList.head = nullptr; m_commandList.tail = nullptr; @@ -736,6 +737,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_boundGraphicsPipeline= nullptr; m_boundComputePipeline= nullptr; m_boundRayTracingPipeline= nullptr; + m_haveRtPipelineStackSize = false; releaseResourcesBackToPool_impl(); } @@ -878,6 +880,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject uint64_t m_resetCheckedStamp; STATE m_state = STATE::INITIAL; bool m_noCommands = true; + bool m_haveRtPipelineStackSize = false; // only useful while recording SInheritanceInfo m_cachedInheritanceInfo; core::bitflag m_recordingFlags = USAGE::NONE; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index c17c6860c7..36c726e61f 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1890,6 +1890,9 @@ bool IGPUCommandBuffer::resolveImage(const IGPUImage* const srcImage, const IGPU bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSize) { + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + return false; + m_haveRtPipelineStackSize = true; return setRayTracingPipelineStackSize_impl(pipelineStackSize); } @@ -1937,6 +1940,13 @@ bool IGPUCommandBuffer::traceRays( return false; } + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-None-09458 + if (m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize && !m_haveRtPipelineStackSize) + { + NBL_LOG_ERROR("no setRayTracingPipelineStackSize command submitted before traceRays command with dynamic stack size pipeline!"); + return false; + } + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(raygenGroupRange.buffer), core::smart_refctd_ptr(missGroupsRange.buffer), @@ -1984,6 +1994,13 @@ bool IGPUCommandBuffer::traceRaysIndirect( return false; } + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-None-09458 + if (m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize && !m_haveRtPipelineStackSize) + { + NBL_LOG_ERROR("no setRayTracingPipelineStackSize command submitted before traceRays command with dynamic stack size pipeline!"); + return false; + } + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(raygenGroupRange.buffer), core::smart_refctd_ptr(missGroupsRange.buffer), From 8294f83f92b3ad161d96215abfc5eb15ccbbddbb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 10 Mar 2025 22:52:06 +0700 Subject: [PATCH 45/68] Convert traceRaysIndirect to use vkCmdTraceRaysIndirect2KHR. --- .../nbl/builtin/hlsl/indirect_commands.hlsl | 12 +++++ include/nbl/video/IGPUCommandBuffer.h | 11 +---- include/nbl/video/IGPUCommandPool.h | 13 ----- src/nbl/video/CVulkanCommandBuffer.cpp | 19 +------ src/nbl/video/CVulkanCommandBuffer.h | 7 +-- src/nbl/video/IGPUCommandBuffer.cpp | 49 ++++++++----------- 6 files changed, 37 insertions(+), 74 deletions(-) diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl index ae8924562f..ca8418bde7 100644 --- a/include/nbl/builtin/hlsl/indirect_commands.hlsl +++ b/include/nbl/builtin/hlsl/indirect_commands.hlsl @@ -39,10 +39,22 @@ struct DispatchIndirectCommand_t struct TraceRaysIndirectCommand_t { + uint64_t raygenShaderRecordAddress; + uint64_t raygenShaderRecordSize; + uint64_t missShaderBindingTableAddress; + uint64_t missShaderBindingTableSize; + uint64_t missShaderBindingTableStride; + uint64_t hitShaderBindingTableAddress; + uint64_t hitShaderBindingTableSize; + uint64_t hitShaderBindingTableStride; + uint64_t callableShaderBindingTableAddress; + uint64_t callableShaderBindingTableSize; + uint64_t callableShaderBindingTableStride; uint32_t width; uint32_t height; uint32_t depth; }; + } } diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index a5dc18781a..a1311756b8 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -531,12 +531,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth); - bool IGPUCommandBuffer::traceRaysIndirect( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& indirectBinding); + bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBinding& indirectBinding); //! Secondary CommandBuffer execute bool executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs); @@ -690,10 +685,6 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) = 0; virtual bool IGPUCommandBuffer::traceRaysIndirect_impl( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, const asset::SBufferBinding& indirectBinding) = 0; virtual bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) = 0; diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index baede33259..6b3a5353a0 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -851,23 +851,10 @@ class IGPUCommandPool::CTraceRaysIndirectCmd final : public IFixedSizeCommand&& raygenGroupBuffer, - core::smart_refctd_ptr&& hitGroupsBuffer, - core::smart_refctd_ptr&& missGroupsBuffer, - core::smart_refctd_ptr&& callableGroupsBuffer, core::smart_refctd_ptr&& bindingBuffer) : - m_raygenGroupBuffer(raygenGroupBuffer), - m_hitGroupsBuffer(hitGroupsBuffer), - m_missGroupsBuffer(missGroupsBuffer), - m_callableGroupsBuffer(callableGroupsBuffer), m_bindingBuffer(bindingBuffer) {} - private: - core::smart_refctd_ptr m_raygenGroupBuffer; - core::smart_refctd_ptr m_hitGroupsBuffer; - core::smart_refctd_ptr m_missGroupsBuffer; - core::smart_refctd_ptr m_callableGroupsBuffer; core::smart_refctd_ptr m_bindingBuffer; }; diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 7b42abd022..298a093c6c 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -844,24 +844,9 @@ bool CVulkanCommandBuffer::traceRays_impl( return true; } -bool CVulkanCommandBuffer::traceRaysIndirect_impl( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& indirectBinding) +bool CVulkanCommandBuffer::traceRaysIndirect_impl(const asset::SBufferBinding& indirectBinding) { - const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupRange.size); - const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); - const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(hitGroupsRange, hitGroupStride); - const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(callableGroupsRange, callableGroupStride); - - getFunctionTable().vkCmdTraceRaysIndirectKHR(m_cmdbuf, - &vk_raygenGroupRegion, - &vk_missGroupsRegion, - &vk_hitGroupsRegion, - &vk_callableGroupsRegion, - indirectBinding.buffer->getDeviceAddress() + indirectBinding.offset); + getFunctionTable().vkCmdTraceRaysIndirect2KHR(m_cmdbuf, indirectBinding.buffer->getDeviceAddress() + indirectBinding.offset); return true; } bool CVulkanCommandBuffer::executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 4549de22a3..1669a4dc15 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -232,12 +232,7 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, uint32_t width, uint32_t height, uint32_t depth) override; - bool traceRaysIndirect_impl( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& indirectBinding) override; + bool traceRaysIndirect_impl(const asset::SBufferBinding& indirectBinding) override; bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) override; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 36c726e61f..4301d48c5f 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -5,6 +5,8 @@ #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" +#include "nbl/builtin/hlsl/indirect_commands.hlsl" + namespace nbl::video { @@ -992,6 +994,11 @@ bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* con return false; } + if (!pipeline->getCachedCreationParams().dynamicStackSize) + { + m_haveRtPipelineStackSize = false; + } + m_boundRayTracingPipeline = pipeline; m_noCommands = false; @@ -1892,6 +1899,10 @@ bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSiz { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; + if (m_boundRayTracingPipeline != nullptr && m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize) + { + NBL_LOG_ERROR("Cannot set dynamic state when state is not mark as dynamic on bound pipeline!"); + } m_haveRtPipelineStackSize = true; return setRayTracingPipelineStackSize_impl(pipelineStackSize); } @@ -1967,12 +1978,7 @@ bool IGPUCommandBuffer::traceRays( width, height, depth); } -bool IGPUCommandBuffer::traceRaysIndirect( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - const asset::SBufferBinding& indirectBinding) +bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBinding& indirectBinding) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; @@ -1982,15 +1988,16 @@ bool IGPUCommandBuffer::traceRaysIndirect( NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); return false; } - const auto flags = m_boundRayTracingPipeline->getCreationFlags(); - if (invalidShaderGroups(raygenGroupRange, - missGroupsRange, missGroupStride, - hitGroupsRange, hitGroupStride, - callableGroupsRange, callableGroupStride, - flags)) + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03634 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03633 + if (invalidBufferBinding(indirectBinding, 4u,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) + return false; + + https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-indirectDeviceAddress-03633 + if (indirectBinding.buffer->getSize() - indirectBinding.offset <= sizeof(hlsl::TraceRaysIndirectCommand_t)) { - NBL_LOG_ERROR("invalid shader groups for traceRays command!"); + NBL_LOG_ERROR("buffer size - offset must be at least the size of TraceRaysIndirectCommand_t!"); return false; } @@ -2002,29 +2009,15 @@ bool IGPUCommandBuffer::traceRaysIndirect( } if (!m_cmdpool->m_commandListPool.emplace(m_commandList, - core::smart_refctd_ptr(raygenGroupRange.buffer), - core::smart_refctd_ptr(missGroupsRange.buffer), - core::smart_refctd_ptr(hitGroupsRange.buffer), - core::smart_refctd_ptr(callableGroupsRange.buffer), core::smart_refctd_ptr(indirectBinding.buffer))) { NBL_LOG_ERROR("out of host memory!"); return false; } - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03634 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03633 - if (invalidBufferBinding(indirectBinding, 4u,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) - return false; - m_noCommands = false; - return traceRaysIndirect_impl( - raygenGroupRange, - missGroupsRange, missGroupStride, - hitGroupsRange, hitGroupStride, - callableGroupsRange, callableGroupStride, - indirectBinding); + return traceRaysIndirect_impl(indirectBinding); } bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) From 1a4d0e6e2ae11331dde73222d4c5c2aa5a36c3c8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 22:14:43 +0700 Subject: [PATCH 46/68] Add Skip triangles bit and skip aabbs bit as common flag. --- include/nbl/asset/IPipeline.h | 4 ++-- include/nbl/video/IGPURayTracingPipeline.h | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index f2e7cb19e4..4fff6722e1 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -71,8 +71,8 @@ class IPipeline //CREATE_LIBRARY = 1<<11, // Ray Tracing Pipelines only - //RAY_TRACING_SKIP_TRIANGLES_BIT_KHR = 1<<12, - //RAY_TRACING_SKIP_AABBS_BIT_KHR = 1<<13, + SKIP_TRIANGLES_BIT = 1<<12, + SKIP_AABBS_BIT = 1<<13, //RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, //RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, //RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 3603905e7f..f4d0131651 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -37,13 +37,13 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), LINK_TIME_OPTIMIZATION = base_flag(LINK_TIME_OPTIMIZATION), RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT = base_flag(RETAIN_LINK_TIME_OPTIMIZATION_INFO), - RAY_TRACING_SKIP_TRIANGLES_BIT_KHR = 1<<12, - RAY_TRACING_SKIP_AABBS_BIT_KHR = 1<<13, - RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, - RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, - RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, - RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 1<<17, - RAY_TRACING_ALLOW_MOTION_BIT_NV = 1<<20, + SKIP_TRIANGLES_BIT_KHR = base_flag(SKIP_TRIANGLES_BIT), + SKIP_AABBS_BIT_KHR = base_flag(SKIP_AABBS_BIT), + NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, + NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, + NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, + NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 1<<17, + ALLOW_MOTION_BIT_NV = 1<<20, }; #undef base_flag From b2188dcdf96ec4726356571bff53232ff802fda2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 22:17:16 +0700 Subject: [PATCH 47/68] Fix setRayTracingStackSize validation. --- src/nbl/video/IGPUCommandBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 4301d48c5f..3f46f23945 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1899,7 +1899,7 @@ bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSiz { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; - if (m_boundRayTracingPipeline != nullptr && m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize) + if (m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize) { NBL_LOG_ERROR("Cannot set dynamic state when state is not mark as dynamic on bound pipeline!"); } From ee1af5da6e5b944a663d7db6d40427fe3b04d0d2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 22:57:14 +0700 Subject: [PATCH 48/68] Improve some flags naming --- include/nbl/asset/IPipeline.h | 4 ++-- include/nbl/video/IGPURayTracingPipeline.h | 12 ++++++------ src/nbl/video/IGPUCommandBuffer.cpp | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index 4fff6722e1..805c3993ac 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -71,8 +71,8 @@ class IPipeline //CREATE_LIBRARY = 1<<11, // Ray Tracing Pipelines only - SKIP_TRIANGLES_BIT = 1<<12, - SKIP_AABBS_BIT = 1<<13, + SKIP_TRIANGLES = 1<<12, + SKIP_AABBS = 1<<13, //RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, //RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, //RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index f4d0131651..cdb04e1c85 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -37,12 +37,12 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), LINK_TIME_OPTIMIZATION = base_flag(LINK_TIME_OPTIMIZATION), RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT = base_flag(RETAIN_LINK_TIME_OPTIMIZATION_INFO), - SKIP_TRIANGLES_BIT_KHR = base_flag(SKIP_TRIANGLES_BIT), - SKIP_AABBS_BIT_KHR = base_flag(SKIP_AABBS_BIT), - NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, - NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, - NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, - NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 1<<17, + SKIP_TRIANGLES = base_flag(SKIP_TRIANGLES), + SKIP_AABBS = base_flag(SKIP_AABBS), + NO_NULL_ANY_HIT_SHADERS = 1<<14, + NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, + NO_NULL_MISS_SHADERS = 1<<16, + NO_NULL_INTERSECTION_SHADERS = 1<<17, ALLOW_MOTION_BIT_NV = 1<<20, }; #undef base_flag diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 3f46f23945..a20152f092 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -699,9 +699,9 @@ bool IGPUCommandBuffer::invalidShaderGroups( // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03697 // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03512 const auto shouldHaveHitGroup = flags & - (PipelineFlags(PipelineFlag::RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR) | - PipelineFlag::RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR | - PipelineFlag::RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR); + (PipelineFlags(PipelineFlag::NO_NULL_ANY_HIT_SHADERS) | + PipelineFlag::NO_NULL_CLOSEST_HIT_SHADERS | + PipelineFlag::NO_NULL_INTERSECTION_SHADERS); if (shouldHaveHitGroup && !hitGroupsRange.buffer) { NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); @@ -709,7 +709,7 @@ bool IGPUCommandBuffer::invalidShaderGroups( } // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03511 - const auto shouldHaveMissGroup = flags & PipelineFlag::RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR; + const auto shouldHaveMissGroup = flags & PipelineFlag::NO_NULL_MISS_SHADERS; if (shouldHaveMissGroup && !missGroupsRange.buffer) { NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); @@ -1899,7 +1899,7 @@ bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSiz { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; - if (m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize) + if (m_boundRayTracingPipeline != nullptr && m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize) { NBL_LOG_ERROR("Cannot set dynamic state when state is not mark as dynamic on bound pipeline!"); } From 1912cd201d7f900076092d827049642f1c78fb7b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 22:57:55 +0700 Subject: [PATCH 49/68] Fix merging bug --- src/nbl/asset/utils/CHLSLCompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 249927f812..929a9411b0 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -49,7 +49,7 @@ static const wchar_t* ShaderStageToString(asset::IShader::E_SHADER_STAGE stage) case asset::IShader::E_SHADER_STAGE::ESS_CLOSEST_HIT: [[fallthrough]]; case asset::IShader::E_SHADER_STAGE::ESS_MISS: [[fallthrough]]; case asset::IShader::E_SHADER_STAGE::ESS_INTERSECTION: [[fallthrough]]; - case asset::IShader::E_SHADER_STAGE::ESS_CALLABLE: [[fallthrough]] + case asset::IShader::E_SHADER_STAGE::ESS_CALLABLE: [[fallthrough]]; case asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY: return L"lib"; default: From 240cfa1f44192d7db6cd12a601da0b85769b0f3d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 23:18:58 +0700 Subject: [PATCH 50/68] Fix indirect Trace ray validation. --- src/nbl/video/IGPUCommandBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index a20152f092..daa7e454a0 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1995,7 +1995,7 @@ bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBindinggetSize() - indirectBinding.offset <= sizeof(hlsl::TraceRaysIndirectCommand_t)) + if (sizeof(hlsl::TraceRaysIndirectCommand_t) + indirectBinding.offset > indirectBinding.buffer->getSize()) { NBL_LOG_ERROR("buffer size - offset must be at least the size of TraceRaysIndirectCommand_t!"); return false; From f22c4c022a703a7de96d76cd1a09d899dc2b911d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 23:28:47 +0700 Subject: [PATCH 51/68] Make the indirect buffer size check clearer. --- src/nbl/video/IGPUCommandBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index daa7e454a0..76cbdd125f 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1995,7 +1995,7 @@ bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBinding indirectBinding.buffer->getSize()) + if (indirectBinding.offset + sizeof(hlsl::TraceRaysIndirectCommand_t) > indirectBinding.buffer->getSize()) { NBL_LOG_ERROR("buffer size - offset must be at least the size of TraceRaysIndirectCommand_t!"); return false; From 1ed9fd7321474951282c1ab88a12e74b418adebf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 23:46:02 +0700 Subject: [PATCH 52/68] Add Motion Blur validation. --- include/nbl/video/IGPURayTracingPipeline.h | 2 +- src/nbl/video/IGPUCommandBuffer.cpp | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index cdb04e1c85..0d28986da6 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -43,7 +43,7 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, NO_NULL_MISS_SHADERS = 1<<16, NO_NULL_INTERSECTION_SHADERS = 1<<17, - ALLOW_MOTION_BIT_NV = 1<<20, + ALLOW_MOTION = 1<<20, }; #undef base_flag diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 76cbdd125f..f47e4f3494 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1989,12 +1989,21 @@ bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBindinggetEnabledFeatures(); + + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-rayTracingMotionBlurPipelineTraceRaysIndirect-04951 + if (m_boundRayTracingPipeline->getCreationFlags() & IGPURayTracingPipeline::SCreationParams::FLAGS::ALLOW_MOTION && !features.rayTracingMotionBlurPipelineTraceRaysIndirect) + { + NBL_LOG_ERROR("If the bound ray tracing pipeline is created with ALLOW_MOTION, rayTracingMotionBlurPipelineTraceRaysIndirect feature must be enabled!"); + return false; + } + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03634 // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03633 if (invalidBufferBinding(indirectBinding, 4u,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) return false; - https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-indirectDeviceAddress-03633 + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-indirectDeviceAddress-03633 if (indirectBinding.offset + sizeof(hlsl::TraceRaysIndirectCommand_t) > indirectBinding.buffer->getSize()) { NBL_LOG_ERROR("buffer size - offset must be at least the size of TraceRaysIndirectCommand_t!"); From 77504186b485e547ad9d7579c414962d81079a0b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 12 Mar 2025 23:53:12 +0700 Subject: [PATCH 53/68] Add comment on some validations --- src/nbl/video/IGPUCommandBuffer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index f47e4f3494..91bdd366dd 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1917,6 +1917,9 @@ bool IGPUCommandBuffer::traceRays( if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-width-03638 + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-height-03639 + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-depth-03640 const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); const auto maxWidth = limits.maxComputeWorkGroupCount[0] * limits.maxWorkgroupSize[0]; const auto maxHeight = limits.maxComputeWorkGroupCount[1] * limits.maxWorkgroupSize[1]; @@ -1927,6 +1930,7 @@ bool IGPUCommandBuffer::traceRays( return false; } + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-width-03641 const auto invocationCount = width * height * depth; if (invocationCount > limits.maxRayDispatchInvocationCount) { From 3a18b0562d1b1e87d5f6190342c5663e67582b46 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 14 Mar 2025 21:16:59 +0700 Subject: [PATCH 54/68] Move creation flags to IRayTracingPipeline --- include/nbl/asset/IRayTracingPipeline.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 81f93bbd2d..93cf5e2e23 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -53,13 +53,33 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted template class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase { + using base_creation_params_t = IPipeline::SCreationParams; public: using SGeneralShaderGroupContainer = core::smart_refctd_dynamic_array; using SHitShaderGroupContainer = core::smart_refctd_dynamic_array; - struct SCreationParams : IPipeline::SCreationParams + struct SCreationParams : base_creation_params_t { + public: + #define base_flag(F) static_cast(base_creation_params_t::FLAGS::F) + enum class FLAGS : uint64_t + { + NONE = base_flag(NONE), + DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), + ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), + FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), + EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), + SKIP_BUILT_IN_PRIMITIVES = 1<<12, + SKIP_AABBS = 1<<13, + NO_NULL_ANY_HIT_SHADERS = 1<<14, + NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, + NO_NULL_MISS_SHADERS = 1<<16, + NO_NULL_INTERSECTION_SHADERS = 1<<17, + ALLOW_MOTION = 1<<20, + }; + #undef base_flag + protected: using SpecInfo = ShaderType::SSpecInfo; template From 82e07271283026c102a04f1dda9be417f79c93e2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 14 Mar 2025 21:17:46 +0700 Subject: [PATCH 55/68] Comment out some flags and add some documentation on pipeline flags. --- include/nbl/asset/IPipeline.h | 46 ++++++++++++++-------- include/nbl/video/IGPUComputePipeline.h | 4 -- include/nbl/video/IGPUGraphicsPipeline.h | 4 -- include/nbl/video/IGPURayTracingPipeline.h | 21 ---------- 4 files changed, 29 insertions(+), 46 deletions(-) diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index 805c3993ac..bd7035158e 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -54,45 +54,57 @@ class IPipeline // Compute Pipelines only //DISPATCH_BASE = 1<<4, - // Weird extension + // This is for NV-raytracing extension. Now this is done via IDeferredOperation //DEFER_COMPILE_NV = 1<<5, - CAPTURE_STATISTICS = 1<<6, - CAPTURE_INTERNAL_REPRESENTATIONS = 1<<7, + // We use Renderdoc to take care of this for us, + // we won't be parsing the statistics and internal representation ourselves. + //CAPTURE_STATISTICS = 1<<6, + //CAPTURE_INTERNAL_REPRESENTATIONS = 1<<7, - // We require Pipeline Cache Control feature so those are satisfied: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkComputePipelineCreateInfo.html#VUID-VkComputePipelineCreateInfo-pipelineCreationCacheControl-02875 + // Will soon be deprecated due to + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/854 FAIL_ON_PIPELINE_COMPILE_REQUIRED = 1<<8, EARLY_RETURN_ON_FAILURE = 1<<9, - LINK_TIME_OPTIMIZATION = 1<<10, + // Will be exposed later with the IPipelineLibrary asset implementation + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/853 + //LINK_TIME_OPTIMIZATION = 1<<10, - //Not Supported Yet + // Won't be exposed because we'll introduce Libraries as a separate object/asset-type + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/853 //CREATE_LIBRARY = 1<<11, // Ray Tracing Pipelines only - SKIP_TRIANGLES = 1<<12, - SKIP_AABBS = 1<<13, - //RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, - //RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, - //RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, - //RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 1<<17, - - // Not Supported Yet + //SKIP_BUILT_IN_PRIMITIVES = 1<<12, + //SKIP_AABBS = 1<<13, + //NO_NULL_ANY_HIT_SHADERS = 1<<14, + //NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, + //NO_NULL_MISS_SHADERS = 1<<16, + //NO_NULL_INTERSECTION_SHADERS = 1<<17, + + // There is a new Device Generated Commands extension with its own flag that will deprecate this //INDIRECT_BINDABLE_NV = 1<<18, // Ray Tracing Pipelines only + // For debug tools //RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR = 1<<19, - //RAY_TRACING_ALLOW_MOTION_BIT_NV = 1<<20, + + // Ray Tracing Pipelines only + //ALLOW_MOTION = 1<<20, // Graphics Pipelineonly (we don't support subpass shading) //RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 1<<21, //RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 1<<22, - RETAIN_LINK_TIME_OPTIMIZATION_INFO = 1<<23, + // Will be exposed later with the IPipelineLibrary asset implementation + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/853 + //RETAIN_LINK_TIME_OPTIMIZATION_INFO = 1<<23, // Ray Tracing Pipelines only //RAY_TRACING_OPACITY_MICROMAP_BIT_EXT = 1<<24, + + // Not supported yet, and we will move to dynamic rendering, so this might never be supported //COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT = 1<<25, //DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT = 1<<26, diff --git a/include/nbl/video/IGPUComputePipeline.h b/include/nbl/video/IGPUComputePipeline.h index 1d5da7644c..4d0fbaa39f 100644 --- a/include/nbl/video/IGPUComputePipeline.h +++ b/include/nbl/video/IGPUComputePipeline.h @@ -35,12 +35,8 @@ class IGPUComputePipeline : public IBackendObject, public asset::IPipeline { - #define base_flag(F) static_cast(pipeline_t::SCreationParams::FLAGS::F) - enum class FLAGS : uint64_t - { - NONE = base_flag(NONE), - DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), - ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), - CAPTURE_STATISTICS = base_flag(CAPTURE_STATISTICS), - CAPTURE_INTERNAL_REPRESENTATIONS = base_flag(CAPTURE_INTERNAL_REPRESENTATIONS), - FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), - EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), - LINK_TIME_OPTIMIZATION = base_flag(LINK_TIME_OPTIMIZATION), - RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT = base_flag(RETAIN_LINK_TIME_OPTIMIZATION_INFO), - SKIP_TRIANGLES = base_flag(SKIP_TRIANGLES), - SKIP_AABBS = base_flag(SKIP_AABBS), - NO_NULL_ANY_HIT_SHADERS = 1<<14, - NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, - NO_NULL_MISS_SHADERS = 1<<16, - NO_NULL_INTERSECTION_SHADERS = 1<<17, - ALLOW_MOTION = 1<<20, - }; - #undef base_flag inline SSpecializationValidationResult valid() const { From 42b43a5bc7bcd4bfd7f200b043f928c1fa8c6ace Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 14 Mar 2025 21:20:23 +0700 Subject: [PATCH 56/68] Add rayTracingValidation feature --- src/nbl/video/device_capabilities/device_features.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/nbl/video/device_capabilities/device_features.json b/src/nbl/video/device_capabilities/device_features.json index 49956e5393..5e4775e9b4 100644 --- a/src/nbl/video/device_capabilities/device_features.json +++ b/src/nbl/video/device_capabilities/device_features.json @@ -2086,6 +2086,11 @@ "name": "rayTracingPipeline", "value": false }, + { + "type": "bool", + "name": "rayTracingValidation", + "value": false + }, { "type": "bool", "name": "rayTracingPipelineShaderGroupHandleCaptureReplay", From 52a636d8ee0986a50c0cef928d561e79973640fb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 14 Mar 2025 22:08:45 +0700 Subject: [PATCH 57/68] Add validation that rayTraversalPrimitiveCulling feaature is enabled when creating pipeline with SKIP_AABB and SKIP_BUILT_IN_PRIMITVE flags enabled --- src/nbl/video/ILogicalDevice.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index b4aee53f44..fbed660821 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -969,6 +969,27 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline return false; } + if (!features.rayTraversalPrimitiveCulling) + { + for (const auto& param : params) + { + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03596 + if (param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS) + { + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); + return false; + } + + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 + if (param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES) + { + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); + return false; + } + + } + } + const auto& limits = getPhysicalDeviceLimits(); for (const auto& param : params) { From 14f5d35bb8b357313f32ed92ce11f60c81f8cd6a Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 14 Mar 2025 22:58:20 +0700 Subject: [PATCH 58/68] Add more validation. --- include/nbl/asset/IRayTracingPipeline.h | 36 ++++++++++++------- include/nbl/video/IGPURayTracingPipeline.h | 2 -- src/nbl/video/ILogicalDevice.cpp | 41 ++++++++++++++-------- 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 93cf5e2e23..c1bf999386 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -107,15 +107,10 @@ class IRayTracingPipeline : public IPipeline, public IRayTra return shaders[index].shader->getStage(); }; - if (shaderGroups.raygen.index >= shaders.size()) - return false; - if (getShaderStage(shaderGroups.raygen.index) != ICPUShader::E_SHADER_STAGE::ESS_RAYGEN) - return false; - - auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage) -> bool + auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage, bool is_unused_shader_forbidden) -> bool { if (index == SShaderGroupsParams::SIndex::Unused) - return true; + return !is_unused_shader_forbidden; if (index >= shaders.size()) return false; if (getShaderStage(index) != expectedStage) @@ -123,27 +118,42 @@ class IRayTracingPipeline : public IPipeline, public IRayTra return true; }; + if (isValidShaderIndex(shaderGroups.raygen.index, ICPUShader::E_SHADER_STAGE::ESS_RAYGEN, true)) + { + return false; + } + for (const auto& shaderGroup : shaderGroups.hits) { - if (!isValidShaderIndex(shaderGroup.anyHit, ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03470 + if (!isValidShaderIndex(shaderGroup.anyHit, + ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT, + bool(flags & FLAGS::NO_NULL_ANY_HIT_SHADERS))) return false; - if (!isValidShaderIndex(shaderGroup.closestHit, ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT)) + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03471 + if (!isValidShaderIndex(shaderGroup.closestHit, + ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT, + bool(flags & FLAGS::NO_NULL_CLOSEST_HIT_SHADERS))) return false; - if (!isValidShaderIndex(shaderGroup.intersectionShader, ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) + if (!isValidShaderIndex(shaderGroup.intersectionShader, + ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION, + false)) return false; } for (const auto& shaderGroup : shaderGroups.misses) { - if (!isValidShaderIndex(shaderGroup.index, ICPUShader::E_SHADER_STAGE::ESS_MISS)) + if (!isValidShaderIndex(shaderGroup.index, + ICPUShader::E_SHADER_STAGE::ESS_MISS, + false)) return false; } for (const auto& shaderGroup : shaderGroups.callables) { - if (!isValidShaderIndex(shaderGroup.index, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE)) + if (!isValidShaderIndex(shaderGroup.index, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE, false)) return false; } return true; @@ -163,6 +173,8 @@ class IRayTracingPipeline : public IPipeline, public IRayTra std::span shaders = {}; SShaderGroupsParams shaderGroups; SCachedCreationParams cached = {}; + // TODO: Could guess the required flags from SPIR-V introspection of declared caps + core::bitflag flags = FLAGS::NONE; }; inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 750e47f65d..98d39ebe7d 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -56,8 +56,6 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP inline std::span getShaders() const { return shaders; } - // TODO: Could guess the required flags from SPIR-V introspection of declared caps - core::bitflag flags = FLAGS::NONE; }; inline core::bitflag getCreationFlags() const { return m_flags; } diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index fbed660821..823138f8b0 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -963,36 +963,49 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } const auto& features = getEnabledFeatures(); + + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-vkCreateRayTracingPipelinesKHR-rayTracingPipeline-03586 if (!features.rayTracingPipeline) { NBL_LOG_ERROR("Feature `ray tracing pipeline` is not enabled"); return false; } - if (!features.rayTraversalPrimitiveCulling) + for (const auto& param : params) { - for (const auto& param : params) + const bool skipAABBs = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS); + const bool skipBuiltin = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES); + + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 + if (skipAABBs && skipBuiltin) { - // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03596 - if (param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS) - { - NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); - return false; - } + NBL_LOG_ERROR("Flags must not include both SKIP_AABBS and SKIP_BUILT_IN_PRIMITIVE!"); + return false; + } - // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 - if (param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES) - { - NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); - return false; - } + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03596 + if (skipAABBs && !features.rayTraversalPrimitiveCulling) + { + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); + return false; + } + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 + if (skipBuiltin && !features.rayTraversalPrimitiveCulling) + { + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); + return false; } + + } + if (!features.rayTraversalPrimitiveCulling) + { } const auto& limits = getPhysicalDeviceLimits(); for (const auto& param : params) { + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-maxPipelineRayRecursionDepth-03589 if (param.cached.maxRecursionDepth > limits.maxRayRecursionDepth) { NBL_LOG_ERROR("Invalid maxRecursionDepth. maxRecursionDepth(%u) exceed the limits(%u)", param.cached.maxRecursionDepth, limits.maxRayRecursionDepth); From 9ed09451a248b506162e9526837afd9565f3d8d1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 15:54:12 +0700 Subject: [PATCH 59/68] Remove dead code --- src/nbl/video/ILogicalDevice.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 823138f8b0..b3fb989a8a 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -998,9 +998,6 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } } - if (!features.rayTraversalPrimitiveCulling) - { - } const auto& limits = getPhysicalDeviceLimits(); for (const auto& param : params) From 94cb188998b6a8d298f78492984c36105d180269 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 15:54:45 +0700 Subject: [PATCH 60/68] Improve naming of hit group --- include/nbl/asset/IRayTracingPipeline.h | 4 ++-- src/nbl/video/CVulkanLogicalDevice.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index c1bf999386..f85c9dd0ba 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -26,7 +26,7 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted { uint32_t closestHit = SIndex::Unused; uint32_t anyHit = SIndex::Unused; - uint32_t intersectionShader = SIndex::Unused; + uint32_t intersection = SIndex::Unused; }; SIndex raygen; @@ -137,7 +137,7 @@ class IRayTracingPipeline : public IPipeline, public IRayTra bool(flags & FLAGS::NO_NULL_CLOSEST_HIT_SHADERS))) return false; - if (!isValidShaderIndex(shaderGroup.intersectionShader, + if (!isValidShaderIndex(shaderGroup.intersection, ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION, false)) return false; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 7ff7fdb15a..e866805b65 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1488,12 +1488,12 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( return { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .pNext = nullptr, - .type = group.intersectionShader == SShaderGroupParams::SIndex::Unused ? + .type = group.intersection == SShaderGroupParams::SIndex::Unused ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = getVkShaderIndex(group.closestHit), .anyHitShader = getVkShaderIndex(group.anyHit), - .intersectionShader = getVkShaderIndex(group.intersectionShader), + .intersectionShader = getVkShaderIndex(group.intersection), }; }; for (const auto& info : createInfos) From 266d0eb53da7547244206592f641309c9c922d3e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 18:23:05 +0700 Subject: [PATCH 61/68] Fix validation on raygen shaderGroup --- include/nbl/asset/IRayTracingPipeline.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index f85c9dd0ba..3840758fd2 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -118,7 +118,7 @@ class IRayTracingPipeline : public IPipeline, public IRayTra return true; }; - if (isValidShaderIndex(shaderGroups.raygen.index, ICPUShader::E_SHADER_STAGE::ESS_RAYGEN, true)) + if (!isValidShaderIndex(shaderGroups.raygen.index, ICPUShader::E_SHADER_STAGE::ESS_RAYGEN, true)) { return false; } From bbcff6978bff63976b69df88188e6d518060735c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 18:24:09 +0700 Subject: [PATCH 62/68] Implement get ray tracing stack size api --- include/nbl/video/CVulkanRayTracingPipeline.h | 17 +++ include/nbl/video/IGPURayTracingPipeline.h | 13 ++ src/nbl/video/CVulkanRayTracingPipeline.cpp | 124 +++++++++++++++++- 3 files changed, 148 insertions(+), 6 deletions(-) diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h index 4be7f3bf91..4fd3db5fe3 100644 --- a/include/nbl/video/CVulkanRayTracingPipeline.h +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -14,6 +14,8 @@ class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline { using ShaderRef = core::smart_refctd_ptr; using ShaderContainer = core::smart_refctd_dynamic_array; + using GeneralGroupStackSizeContainer = core::smart_refctd_dynamic_array; + using HitGroupStackSizeContainer = core::smart_refctd_dynamic_array; public: @@ -33,12 +35,27 @@ class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline virtual const SShaderGroupHandle& getHit(uint32_t index) const override; virtual const SShaderGroupHandle& getCallable(uint32_t index) const override; + virtual uint16_t getRaygenStackSize() const override; + virtual std::span getMissStackSizes() const override; + virtual std::span getHitStackSizes() const override; + virtual std::span getCallableStackSizes() const override; + virtual uint16_t getDefaultStackSize() const override; + private: ~CVulkanRayTracingPipeline() override; const VkPipeline m_vkPipeline; ShaderContainer m_shaders; ShaderGroupHandleContainer m_shaderGroupHandles; + uint16_t m_raygenStackSize; + core::smart_refctd_dynamic_array m_missStackSizes; + core::smart_refctd_dynamic_array m_hitGroupStackSizes; + core::smart_refctd_dynamic_array m_callableStackSizes; + + uint32_t getRaygenIndex() const; + uint32_t getMissBaseIndex() const; + uint32_t getHitBaseIndex() const; + uint32_t getCallableBaseIndex() const; }; } diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 98d39ebe7d..8a395931e2 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -23,6 +23,13 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP }; static_assert(sizeof(SShaderGroupHandle) == video::SPhysicalDeviceLimits::ShaderGroupHandleSize); + struct SHitGroupStackSize + { + uint16_t closestHit; + uint16_t anyHit; + uint16_t intersection; + }; + struct SCreationParams final : pipeline_t::SCreationParams, SPipelineCreationParams { @@ -68,6 +75,12 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP virtual const SShaderGroupHandle& getHit(uint32_t index) const = 0; virtual const SShaderGroupHandle& getCallable(uint32_t index) const = 0; + virtual uint16_t getRaygenStackSize() const = 0; + virtual std::span getMissStackSizes() const = 0; + virtual std::span getHitStackSizes() const = 0; + virtual std::span getCallableStackSizes() const = 0; + virtual uint16_t getDefaultStackSize() const = 0; + protected: IGPURayTracingPipeline(const SCreationParams& params) : IBackendObject(core::smart_refctd_ptr(params.layout->getOriginDevice())), pipeline_t(params), diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index 675bd6ddd2..d6494c6247 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -1,6 +1,10 @@ -#include "nbl/video/CVulkanRayTracingPipeline.h" +#include "nbl/asset/IRayTracingPipeline.h" +#include "nbl/video/CVulkanRayTracingPipeline.h" #include "nbl/video/CVulkanLogicalDevice.h" +#include "nbl/video/IGPURayTracingPipeline.h" + +#include namespace nbl::video { @@ -12,11 +16,60 @@ namespace nbl::video IGPURayTracingPipeline(params), m_vkPipeline(vk_pipeline), m_shaders(core::make_refctd_dynamic_array(params.shaders.size())), + m_missStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.misses.size())), + m_hitGroupStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.hits.size())), + m_callableStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.hits.size())), m_shaderGroupHandles(std::move(shaderGroupHandles)) { for (size_t shaderIx = 0; shaderIx < params.shaders.size(); shaderIx++) m_shaders->operator[](shaderIx) = ShaderRef(static_cast(params.shaders[shaderIx].shader)); + const auto* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + + auto getVkShaderGroupStackSize = [&](uint32_t baseGroupIx, uint32_t shaderGroupIx, uint32_t shaderIx, VkShaderGroupShaderKHR shaderType) -> uint16_t + { + if (shaderIx == SShaderGroupsParams::SIndex::Unused) + return 0; + + return vk->vk.vkGetRayTracingShaderGroupStackSizeKHR( + vulkanDevice->getInternalObject(), + m_vkPipeline, + baseGroupIx + shaderGroupIx, + shaderType + ); + }; + + m_raygenStackSize = getVkShaderGroupStackSize(getRaygenIndex(), 0, params.shaderGroups.raygen.index, VK_SHADER_GROUP_SHADER_GENERAL_KHR); + + for (size_t shaderGroupIx = 0; shaderGroupIx < params.shaderGroups.misses.size(); shaderGroupIx++) + { + m_missStackSizes->operator[](shaderGroupIx) = getVkShaderGroupStackSize( + getMissBaseIndex(), + shaderGroupIx, + params.shaderGroups.misses[shaderGroupIx].index, + VK_SHADER_GROUP_SHADER_GENERAL_KHR); + } + + for (size_t shaderGroupIx = 0; shaderGroupIx < params.shaderGroups.hits.size(); shaderGroupIx++) + { + const auto& hitGroup = params.shaderGroups.hits[shaderGroupIx]; + const auto baseIndex = getHitBaseIndex(); + m_hitGroupStackSizes->operator[](shaderGroupIx) = SHitGroupStackSize{ + .closestHit = getVkShaderGroupStackSize(baseIndex,shaderGroupIx, hitGroup.closestHit, VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR), + .anyHit = getVkShaderGroupStackSize(baseIndex, shaderGroupIx, hitGroup.anyHit,VK_SHADER_GROUP_SHADER_ANY_HIT_KHR), + .intersection = getVkShaderGroupStackSize(baseIndex, shaderGroupIx, hitGroup.intersection, VK_SHADER_GROUP_SHADER_INTERSECTION_KHR), + }; + } + + for (size_t shaderGroupIx = 0; shaderGroupIx < params.shaderGroups.callables.size(); shaderGroupIx++) + { + m_callableStackSizes->operator[](shaderGroupIx) = getVkShaderGroupStackSize( + getCallableBaseIndex(), + shaderGroupIx, + params.shaderGroups.callables[shaderGroupIx].index, + VK_SHADER_GROUP_SHADER_GENERAL_KHR); + } } CVulkanRayTracingPipeline::~CVulkanRayTracingPipeline() @@ -26,27 +79,86 @@ namespace nbl::video vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); } - const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getRaygen() const { - return m_shaderGroupHandles->operator[](0); + return m_shaderGroupHandles->operator[](getRaygenIndex()); } const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getMiss(uint32_t index) const { - const auto baseIndex = 1; // one raygen group before this groups + const auto baseIndex = getMissBaseIndex(); return m_shaderGroupHandles->operator[](baseIndex + index); } const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getHit(uint32_t index) const { - const auto baseIndex = 1 + getMissGroupCount(); // one raygen group + miss gropus before this groups + const auto baseIndex = getHitBaseIndex(); return m_shaderGroupHandles->operator[](baseIndex + index); } const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getCallable(uint32_t index) const { - const auto baseIndex = 1 + getMissGroupCount() + getHitGroupCount(); // one raygen group + miss groups + hit gropus before this groups + const auto baseIndex = getCallableBaseIndex(); return m_shaderGroupHandles->operator[](baseIndex + index); } + + uint16_t CVulkanRayTracingPipeline::getRaygenStackSize() const + { + return m_raygenStackSize; + } + + std::span CVulkanRayTracingPipeline::getMissStackSizes() const + { + return std::span(m_missStackSizes->begin(), m_missStackSizes->end()); + } + + std::span CVulkanRayTracingPipeline::getHitStackSizes() const + { + return std::span(m_hitGroupStackSizes->begin(), m_hitGroupStackSizes->end()); + } + + std::span CVulkanRayTracingPipeline::getCallableStackSizes() const + { + return std::span(m_callableStackSizes->begin(), m_callableStackSizes->end()); + } + + uint16_t CVulkanRayTracingPipeline::getDefaultStackSize() const + { + // calculation follow the formula from + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#ray-tracing-pipeline-stack + const auto raygenStackMax = m_raygenStackSize; + const auto closestHitStackMax = std::ranges::max_element(getHitStackSizes(), std::ranges::less{}, &SHitGroupStackSize::closestHit)->closestHit; + const auto anyHitStackMax = std::ranges::max_element(getHitStackSizes(), std::ranges::less{}, &SHitGroupStackSize::anyHit)->anyHit; + const auto intersectionStackMax = std::ranges::max_element(getHitStackSizes(), std::ranges::less{}, &SHitGroupStackSize::intersection)->intersection; + const auto missStackMax = *std::ranges::max_element(getMissStackSizes()); + const auto callableStackMax = *std::ranges::max_element(getCallableStackSizes()); + return raygenStackMax + std::min(1, m_params.maxRecursionDepth) * + std::max(closestHitStackMax, std::max(missStackMax, intersectionStackMax + anyHitStackMax)) + + std::max(0, m_params.maxRecursionDepth - 1) * std::max(closestHitStackMax, missStackMax) + 2 * + callableStackMax; + } + + uint32_t CVulkanRayTracingPipeline::getRaygenIndex() const + { + return 0; + } + + uint32_t CVulkanRayTracingPipeline::getMissBaseIndex() const + { + // one raygen group before this groups + return 1; + } + + uint32_t CVulkanRayTracingPipeline::getHitBaseIndex() const + { + // one raygen group + miss groups before this groups + return 1 + getMissGroupCount(); + } + + uint32_t CVulkanRayTracingPipeline::getCallableBaseIndex() const + { + // one raygen group + miss groups + hit groups before this groups + return 1 + getMissGroupCount() + getHitGroupCount(); + } + } From 67b5181906842e4cb1e66822768562b405944ff6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 18:57:20 +0700 Subject: [PATCH 63/68] Change get shader group handle api to return span --- include/nbl/asset/IRayTracingPipeline.h | 3 --- include/nbl/video/CVulkanRayTracingPipeline.h | 6 +++--- include/nbl/video/IGPURayTracingPipeline.h | 6 +++--- src/nbl/video/CVulkanRayTracingPipeline.cpp | 17 +++++++++-------- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 3840758fd2..2112db2346 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -178,9 +178,6 @@ class IRayTracingPipeline : public IPipeline, public IRayTra }; inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } - inline uint32_t getHitGroupCount() const { return m_hitShaderGroups->size(); } - inline uint32_t getMissGroupCount() const { return m_missShaderGroups->size(); } - inline uint32_t getCallableGroupCount() const { return m_callableShaderGroups->size(); } protected: explicit IRayTracingPipeline(const SCreationParams& _params) : diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h index 4fd3db5fe3..ca14d44ee9 100644 --- a/include/nbl/video/CVulkanRayTracingPipeline.h +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -31,9 +31,9 @@ class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline inline VkPipeline getInternalObject() const { return m_vkPipeline; } virtual const SShaderGroupHandle& getRaygen() const override; - virtual const SShaderGroupHandle& getMiss(uint32_t index) const override; - virtual const SShaderGroupHandle& getHit(uint32_t index) const override; - virtual const SShaderGroupHandle& getCallable(uint32_t index) const override; + virtual std::span getMissHandles() const override; + virtual std::span getHitHandles() const override; + virtual std::span getCallableHandles() const override; virtual uint16_t getRaygenStackSize() const override; virtual std::span getMissStackSizes() const override; diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 8a395931e2..2d0b8961f9 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -71,9 +71,9 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP virtual const void* getNativeHandle() const = 0; virtual const SShaderGroupHandle& getRaygen() const = 0; - virtual const SShaderGroupHandle& getMiss(uint32_t index) const = 0; - virtual const SShaderGroupHandle& getHit(uint32_t index) const = 0; - virtual const SShaderGroupHandle& getCallable(uint32_t index) const = 0; + virtual std::span getMissHandles() const = 0; + virtual std::span getHitHandles() const = 0; + virtual std::span getCallableHandles() const = 0; virtual uint16_t getRaygenStackSize() const = 0; virtual std::span getMissStackSizes() const = 0; diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index d6494c6247..82ed355bba 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -5,6 +5,7 @@ #include "nbl/video/IGPURayTracingPipeline.h" #include +#include namespace nbl::video { @@ -84,22 +85,22 @@ namespace nbl::video return m_shaderGroupHandles->operator[](getRaygenIndex()); } - const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getMiss(uint32_t index) const + std::span CVulkanRayTracingPipeline::getMissHandles() const { const auto baseIndex = getMissBaseIndex(); - return m_shaderGroupHandles->operator[](baseIndex + index); + return std::span(m_shaderGroupHandles->begin() + baseIndex, m_missShaderGroups->size()); } - const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getHit(uint32_t index) const + std::span CVulkanRayTracingPipeline::getHitHandles() const { const auto baseIndex = getHitBaseIndex(); - return m_shaderGroupHandles->operator[](baseIndex + index); + return std::span(m_shaderGroupHandles->begin() + baseIndex, m_hitShaderGroups->size()); } - const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getCallable(uint32_t index) const + std::span CVulkanRayTracingPipeline::getCallableHandles() const { const auto baseIndex = getCallableBaseIndex(); - return m_shaderGroupHandles->operator[](baseIndex + index); + return std::span(m_shaderGroupHandles->begin() + baseIndex, m_callableShaderGroups->size()); } uint16_t CVulkanRayTracingPipeline::getRaygenStackSize() const @@ -152,13 +153,13 @@ namespace nbl::video uint32_t CVulkanRayTracingPipeline::getHitBaseIndex() const { // one raygen group + miss groups before this groups - return 1 + getMissGroupCount(); + return 1 + m_missShaderGroups->size(); } uint32_t CVulkanRayTracingPipeline::getCallableBaseIndex() const { // one raygen group + miss groups + hit groups before this groups - return 1 + getMissGroupCount() + getHitGroupCount(); + return 1 + m_missShaderGroups->size() + m_hitShaderGroups->size(); } } From 2904e3e628844d1e6168fab35298750d51bf08b4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 21:13:29 +0700 Subject: [PATCH 64/68] Fix ray tracing capture replay feature enable logic --- src/nbl/video/CVulkanPhysicalDevice.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index fc45b987bd..3b7df3a9dd 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -1787,8 +1787,11 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic accelerationStructureFeatures.descriptorBindingAccelerationStructureUpdateAfterBind = enabledFeatures.accelerationStructure; rayTracingPipelineFeatures.rayTracingPipeline = enabledFeatures.rayTracingPipeline; - rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay = m_rdoc_api!=nullptr; - rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplayMixed = m_rdoc_api!=nullptr; + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay = + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay && m_initData.api->isRunningInGraphicsDebugger(); + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplayMixed = + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplayMixed && + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay; rayTracingPipelineFeatures.rayTracingPipelineTraceRaysIndirect = enabledFeatures.rayTracingPipeline; rayTracingPipelineFeatures.rayTraversalPrimitiveCulling = enabledFeatures.rayTraversalPrimitiveCulling; From d79fe20b745f0f7551051ec78eb6bf8449f7faba Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 21:39:12 +0700 Subject: [PATCH 65/68] Remove unused code --- include/nbl/asset/IRayTracingPipeline.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 2112db2346..6bc66ad241 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -88,8 +88,8 @@ class IRayTracingPipeline : public IPipeline, public IRayTra if (!IPipeline::SCreationParams::layout) return false; - core::bitflag stagePresence = {}; for (const auto info : shaders) + { if (info.shader) { if (!extra(info)) @@ -99,7 +99,6 @@ class IRayTracingPipeline : public IPipeline, public IRayTra return false; if (!std::has_single_bit>(stage)) return false; - stagePresence |= stage; } auto getShaderStage = [this](size_t index) -> ICPUShader::E_SHADER_STAGE From c09154bc270ca433b5457dad9e84ee2831409616 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 17 Mar 2025 21:39:59 +0700 Subject: [PATCH 66/68] Fix ray tracing pipeline creation, by not skipping null shader instead disallow it. --- include/nbl/asset/IRayTracingPipeline.h | 8 +++++++- src/nbl/video/CVulkanLogicalDevice.cpp | 5 +---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index 6bc66ad241..e531b034e1 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -95,11 +95,17 @@ class IRayTracingPipeline : public IPipeline, public IRayTra if (!extra(info)) return false; const auto stage = info.shader->getStage(); - if ((stage & ~ICPUShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING)!=0) + if ((stage & ~ICPUShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING) != 0) return false; if (!std::has_single_bit>(stage)) return false; } + else + { + // every shader must not be null. use SIndex::Unused to represent unused shader. + return false; + } + } auto getShaderStage = [this](size_t index) -> ICPUShader::E_SHADER_STAGE { diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index e866805b65..27f4e75548 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1502,10 +1502,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( outCreateInfo->pStages = outShaderStage; for (const auto& specInfo : info.shaders) { - if (specInfo.shader) - { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(specInfo,outRequiredSubgroupSize,outSpecInfo,outSpecMapEntry,outSpecData); - } + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(specInfo,outRequiredSubgroupSize,outSpecInfo,outSpecMapEntry,outSpecData); } outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); assert(outCreateInfo->stageCount != 0); From c35d01a7e69547518b9f59670f078923c8fb9c52 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Mar 2025 15:07:16 +0700 Subject: [PATCH 67/68] Fix stack size calculation when the shader group is empty --- src/nbl/video/CVulkanRayTracingPipeline.cpp | 22 +++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index 82ed355bba..0db3ca94ed 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -4,7 +4,6 @@ #include "nbl/video/CVulkanLogicalDevice.h" #include "nbl/video/IGPURayTracingPipeline.h" -#include #include namespace nbl::video @@ -128,11 +127,22 @@ namespace nbl::video // calculation follow the formula from // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#ray-tracing-pipeline-stack const auto raygenStackMax = m_raygenStackSize; - const auto closestHitStackMax = std::ranges::max_element(getHitStackSizes(), std::ranges::less{}, &SHitGroupStackSize::closestHit)->closestHit; - const auto anyHitStackMax = std::ranges::max_element(getHitStackSizes(), std::ranges::less{}, &SHitGroupStackSize::anyHit)->anyHit; - const auto intersectionStackMax = std::ranges::max_element(getHitStackSizes(), std::ranges::less{}, &SHitGroupStackSize::intersection)->intersection; - const auto missStackMax = *std::ranges::max_element(getMissStackSizes()); - const auto callableStackMax = *std::ranges::max_element(getCallableStackSizes()); + + auto getMaxSize = [&](auto ranges, auto valProj) -> uint16_t + { + auto maxValue = 0; + for (const auto& val : ranges) + { + maxValue = std::max(maxValue, std::invoke(valProj, val)); + } + return maxValue; + }; + + const auto closestHitStackMax = getMaxSize(getHitStackSizes(), &SHitGroupStackSize::closestHit); + const auto anyHitStackMax = getMaxSize(getHitStackSizes(), &SHitGroupStackSize::anyHit); + const auto intersectionStackMax = getMaxSize(getHitStackSizes(), &SHitGroupStackSize::intersection); + const auto missStackMax = getMaxSize(getMissStackSizes(), std::identity{}); + const auto callableStackMax = getMaxSize(getCallableStackSizes(), std::identity{}); return raygenStackMax + std::min(1, m_params.maxRecursionDepth) * std::max(closestHitStackMax, std::max(missStackMax, intersectionStackMax + anyHitStackMax)) + std::max(0, m_params.maxRecursionDepth - 1) * std::max(closestHitStackMax, missStackMax) + 2 * From 3fd2e1453861588a5b792c3ef1f39617673c9405 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Mar 2025 20:50:38 +0700 Subject: [PATCH 68/68] Add tea, lcg and pcg into nb::hlsl::random --- docker/compiler-explorer | 2 +- include/nbl/builtin/hlsl/random/lcg.hlsl | 34 ++++++++++++++++++++ include/nbl/builtin/hlsl/random/pcg.hlsl | 34 ++++++++++++++++++++ include/nbl/builtin/hlsl/random/tea.hlsl | 40 ++++++++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 3 ++ 5 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 include/nbl/builtin/hlsl/random/lcg.hlsl create mode 100644 include/nbl/builtin/hlsl/random/pcg.hlsl create mode 100644 include/nbl/builtin/hlsl/random/tea.hlsl diff --git a/docker/compiler-explorer b/docker/compiler-explorer index e7d3e6ce85..fb7eebdf99 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit e7d3e6ce85d4b87bd9afadc5b2ba8c268ccbeb51 +Subproject commit fb7eebdf9972f01d53d284442db13a32f2e2d4ab diff --git a/include/nbl/builtin/hlsl/random/lcg.hlsl b/include/nbl/builtin/hlsl/random/lcg.hlsl new file mode 100644 index 0000000000..09a2263182 --- /dev/null +++ b/include/nbl/builtin/hlsl/random/lcg.hlsl @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_GLSL_RANDOM_LCG_HLSL_INCLUDED_ +#define _NBL_BUILTIN_GLSL_RANDOM_LCG_HLSL_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ + +struct Lcg +{ + static Lcg construct(NBL_CONST_REF_ARG(uint32_t) state) + { + return Lcg(state); + } + + uint32_t2 operator()() + { + uint32_t LCG_A = 1664525u; + uint32_t LCG_C = 1013904223u; + state = (LCG_A * state + LCG_C); + state &= 0x00FFFFFF; + return state; + } + + uint32_t state; +}; + +} +} +#endif diff --git a/include/nbl/builtin/hlsl/random/pcg.hlsl b/include/nbl/builtin/hlsl/random/pcg.hlsl new file mode 100644 index 0000000000..51a66b355b --- /dev/null +++ b/include/nbl/builtin/hlsl/random/pcg.hlsl @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_GLSL_RANDOM_PCG_HLSL_INCLUDED_ +#define _NBL_BUILTIN_GLSL_RANDOM_PCG_HLSL_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ + +struct Pcg +{ + static Pcg construct(NBL_CONST_REF_ARG(uint32_t) initialState) + { + uint32_t state = {initialState}; + return Pcg(state); + } + + uint32_t operator()() + { + const uint32_t tmp = state * 747796405u + 2891336453u; + const uint32_t word = ((tmp >> ((tmp >> 28u) + 4u)) ^ tmp) * 277803737u; + state = (word >> 22u) ^ word; + return state; + } + + uint32_t state; +}; + +} +} +#endif diff --git a/include/nbl/builtin/hlsl/random/tea.hlsl b/include/nbl/builtin/hlsl/random/tea.hlsl new file mode 100644 index 0000000000..b477094358 --- /dev/null +++ b/include/nbl/builtin/hlsl/random/tea.hlsl @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_GLSL_RANDOM_TEA_HLSL_INCLUDED_ +#define _NBL_BUILTIN_GLSL_RANDOM_TEA_HLSL_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ + +struct Tea +{ + static Tea construct() + { + Tea tea = {}; + return tea; + } + + uint32_t2 operator()(uint32_t stream, uint32_t sequence, uint32_t roundCount) + { + uint32_t sum = 0; + uint32_t v0 = stream; + uint32_t v1 = sequence; + for (uint32_t n = 0; n < roundCount; n++) + { + sum += 0x9e3779b9; + v0 += ((v1 << 4) + 0xa341316c) ^ (v1 + sum) ^ ((v1 >> 5) + 0xc8013ea4); + v1 += ((v0 << 4) + 0xad90777d) ^ (v0 + sum) ^ ((v0 >> 5) + 0x7e95761e); + } + + return uint32_t2(v0, v1); + } + +}; + +} +} +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 291ee64bad..48ede9d6d5 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -106,6 +106,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/property_pool/copy.comp") # random numbers LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/random/xoroshiro.glsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/xoroshiro.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/pcg.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/lcg.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/tea.hlsl") # sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/sampling/bilinear.glsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/sampling/box_muller_transform.glsl")