diff --git a/docker/compiler-explorer b/docker/compiler-explorer index e7d3e6ce85..fb7eebdf99 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit e7d3e6ce85d4b87bd9afadc5b2ba8c268ccbeb51 +Subproject commit fb7eebdf9972f01d53d284442db13a32f2e2d4ab diff --git a/include/nbl/asset/ECommonEnums.h b/include/nbl/asset/ECommonEnums.h index 4f49d89a06..7db562cc6a 100644 --- a/include/nbl/asset/ECommonEnums.h +++ b/include/nbl/asset/ECommonEnums.h @@ -11,7 +11,7 @@ enum E_PIPELINE_BIND_POINT : uint8_t { EPBP_GRAPHICS = 0, EPBP_COMPUTE, - + EPBP_RAY_TRACING, EPBP_COUNT }; diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index 40623876fe..bd7035158e 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -54,45 +54,57 @@ class IPipeline // Compute Pipelines only //DISPATCH_BASE = 1<<4, - // Weird extension + // This is for NV-raytracing extension. Now this is done via IDeferredOperation //DEFER_COMPILE_NV = 1<<5, - CAPTURE_STATISTICS = 1<<6, - CAPTURE_INTERNAL_REPRESENTATIONS = 1<<7, + // We use Renderdoc to take care of this for us, + // we won't be parsing the statistics and internal representation ourselves. + //CAPTURE_STATISTICS = 1<<6, + //CAPTURE_INTERNAL_REPRESENTATIONS = 1<<7, - // We require Pipeline Cache Control feature so those are satisfied: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkComputePipelineCreateInfo.html#VUID-VkComputePipelineCreateInfo-pipelineCreationCacheControl-02875 + // Will soon be deprecated due to + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/854 FAIL_ON_PIPELINE_COMPILE_REQUIRED = 1<<8, EARLY_RETURN_ON_FAILURE = 1<<9, - LINK_TIME_OPTIMIZATION = 1<<10, + // Will be exposed later with the IPipelineLibrary asset implementation + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/853 + //LINK_TIME_OPTIMIZATION = 1<<10, - //Not Supported Yet + // Won't be exposed because we'll introduce Libraries as a separate object/asset-type + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/853 //CREATE_LIBRARY = 1<<11, // Ray Tracing Pipelines only - //RAY_TRACING_SKIP_TRIANGLES_BIT_KHR = 1<<12, - //RAY_TRACING_SKIP_AABBS_BIT_KHR = 1<<13, - //RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 1<<14, - //RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 1<<15, - //RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 1<<16, - //RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 1<<17, - - // Not Supported Yet + //SKIP_BUILT_IN_PRIMITIVES = 1<<12, + //SKIP_AABBS = 1<<13, + //NO_NULL_ANY_HIT_SHADERS = 1<<14, + //NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, + //NO_NULL_MISS_SHADERS = 1<<16, + //NO_NULL_INTERSECTION_SHADERS = 1<<17, + + // There is a new Device Generated Commands extension with its own flag that will deprecate this //INDIRECT_BINDABLE_NV = 1<<18, // Ray Tracing Pipelines only + // For debug tools //RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR = 1<<19, - //RAY_TRACING_ALLOW_MOTION_BIT_NV = 1<<20, + + // Ray Tracing Pipelines only + //ALLOW_MOTION = 1<<20, // Graphics Pipelineonly (we don't support subpass shading) //RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 1<<21, //RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 1<<22, - RETAIN_LINK_TIME_OPTIMIZATION_INFO = 1<<23, + // Will be exposed later with the IPipelineLibrary asset implementation + // https://github.com/Devsh-Graphics-Programming/Nabla/issues/853 + //RETAIN_LINK_TIME_OPTIMIZATION_INFO = 1<<23, // Ray Tracing Pipelines only //RAY_TRACING_OPACITY_MICROMAP_BIT_EXT = 1<<24, + + // Not supported yet, and we will move to dynamic rendering, so this might never be supported //COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT = 1<<25, //DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT = 1<<26, @@ -107,7 +119,8 @@ class IPipeline inline const PipelineLayout* getLayout() const {return m_layout.get();} protected: - inline IPipeline(core::smart_refctd_ptr&& _layout) : m_layout(std::move(_layout)) {} + inline IPipeline(core::smart_refctd_ptr&& _layout) + : m_layout(std::move(_layout)) {} core::smart_refctd_ptr m_layout; }; diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h new file mode 100644 index 0000000000..e531b034e1 --- /dev/null +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -0,0 +1,207 @@ +#ifndef _NBL_ASSET_I_RAY_TRACING_PIPELINE_H_INCLUDED_ +#define _NBL_ASSET_I_RAY_TRACING_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IShader.h" +#include "nbl/asset/IPipeline.h" + +#include +#include +#include + +namespace nbl::asset +{ + +class IRayTracingPipelineBase : public virtual core::IReferenceCounted +{ + public: + struct SShaderGroupsParams + { + struct SIndex + { + constexpr static inline uint32_t Unused = 0xffFFffFFu; + uint32_t index = Unused; + }; + + struct SHitGroup + { + uint32_t closestHit = SIndex::Unused; + uint32_t anyHit = SIndex::Unused; + uint32_t intersection = SIndex::Unused; + }; + + SIndex raygen; + std::span misses; + std::span hits; + std::span callables; + + inline uint32_t getShaderGroupCount() const + { + return 1 + hits.size() + misses.size() + callables.size(); + } + + }; + using SGeneralShaderGroup = SShaderGroupsParams::SIndex; + using SHitShaderGroup = SShaderGroupsParams::SHitGroup; + + struct SCachedCreationParams final + { + uint32_t maxRecursionDepth : 6 = 0; + uint32_t dynamicStackSize : 1 = false; + }; +}; + +template +class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase +{ + using base_creation_params_t = IPipeline::SCreationParams; + public: + + using SGeneralShaderGroupContainer = core::smart_refctd_dynamic_array; + using SHitShaderGroupContainer = core::smart_refctd_dynamic_array; + + struct SCreationParams : base_creation_params_t + { + public: + #define base_flag(F) static_cast(base_creation_params_t::FLAGS::F) + enum class FLAGS : uint64_t + { + NONE = base_flag(NONE), + DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), + ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), + FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), + EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), + SKIP_BUILT_IN_PRIMITIVES = 1<<12, + SKIP_AABBS = 1<<13, + NO_NULL_ANY_HIT_SHADERS = 1<<14, + NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, + NO_NULL_MISS_SHADERS = 1<<16, + NO_NULL_INTERSECTION_SHADERS = 1<<17, + ALLOW_MOTION = 1<<20, + }; + #undef base_flag + + protected: + using SpecInfo = ShaderType::SSpecInfo; + template + inline bool impl_valid(ExtraLambda&& extra) const + { + if (!IPipeline::SCreationParams::layout) + return false; + + for (const auto info : shaders) + { + if (info.shader) + { + if (!extra(info)) + return false; + const auto stage = info.shader->getStage(); + if ((stage & ~ICPUShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING) != 0) + return false; + if (!std::has_single_bit>(stage)) + return false; + } + else + { + // every shader must not be null. use SIndex::Unused to represent unused shader. + return false; + } + } + + auto getShaderStage = [this](size_t index) -> ICPUShader::E_SHADER_STAGE + { + return shaders[index].shader->getStage(); + }; + + auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage, bool is_unused_shader_forbidden) -> bool + { + if (index == SShaderGroupsParams::SIndex::Unused) + return !is_unused_shader_forbidden; + if (index >= shaders.size()) + return false; + if (getShaderStage(index) != expectedStage) + return false; + return true; + }; + + if (!isValidShaderIndex(shaderGroups.raygen.index, ICPUShader::E_SHADER_STAGE::ESS_RAYGEN, true)) + { + return false; + } + + for (const auto& shaderGroup : shaderGroups.hits) + { + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03470 + if (!isValidShaderIndex(shaderGroup.anyHit, + ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT, + bool(flags & FLAGS::NO_NULL_ANY_HIT_SHADERS))) + return false; + + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03471 + if (!isValidShaderIndex(shaderGroup.closestHit, + ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT, + bool(flags & FLAGS::NO_NULL_CLOSEST_HIT_SHADERS))) + return false; + + if (!isValidShaderIndex(shaderGroup.intersection, + ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION, + false)) + return false; + } + + for (const auto& shaderGroup : shaderGroups.misses) + { + if (!isValidShaderIndex(shaderGroup.index, + ICPUShader::E_SHADER_STAGE::ESS_MISS, + false)) + return false; + } + + for (const auto& shaderGroup : shaderGroups.callables) + { + if (!isValidShaderIndex(shaderGroup.index, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE, false)) + return false; + } + return true; + } + + public: + inline bool valid() const + { + return impl_valid([](const SpecInfo& info)->bool + { + if (!info.valid()) + return false; + return false; + }); + } + + std::span shaders = {}; + SShaderGroupsParams shaderGroups; + SCachedCreationParams cached = {}; + // TODO: Could guess the required flags from SPIR-V introspection of declared caps + core::bitflag flags = FLAGS::NONE; + }; + + inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } + + protected: + explicit IRayTracingPipeline(const SCreationParams& _params) : + IPipeline(core::smart_refctd_ptr(_params.layout)), + m_params(_params.cached), + m_raygenShaderGroup(_params.shaderGroups.raygen), + m_missShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.misses)), + m_hitShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.hits)), + m_callableShaderGroups(core::make_refctd_dynamic_array(_params.shaderGroups.callables)) + {} + + SCachedCreationParams m_params; + SGeneralShaderGroup m_raygenShaderGroup; + SGeneralShaderGroupContainer m_missShaderGroups; + SHitShaderGroupContainer m_hitShaderGroups; + SGeneralShaderGroupContainer m_callableShaderGroups; + +}; + +} + +#endif diff --git a/include/nbl/builtin/hlsl/enums.hlsl b/include/nbl/builtin/hlsl/enums.hlsl index 61cb46834a..35c0dc8029 100644 --- a/include/nbl/builtin/hlsl/enums.hlsl +++ b/include/nbl/builtin/hlsl/enums.hlsl @@ -29,6 +29,7 @@ enum ShaderStage : uint32_t ESS_INTERSECTION = 1 << 12, ESS_CALLABLE = 1 << 13, ESS_ALL_GRAPHICS = 0x0000001F, + ESS_ALL_RAY_TRACING = ESS_RAYGEN | ESS_ANY_HIT | ESS_CLOSEST_HIT | ESS_MISS | ESS_INTERSECTION | ESS_CALLABLE, ESS_ALL_OR_LIBRARY = 0x7fffffff }; diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl index efc5f8a3c3..ca8418bde7 100644 --- a/include/nbl/builtin/hlsl/indirect_commands.hlsl +++ b/include/nbl/builtin/hlsl/indirect_commands.hlsl @@ -37,6 +37,24 @@ struct DispatchIndirectCommand_t uint32_t num_groups_z; }; +struct TraceRaysIndirectCommand_t +{ + uint64_t raygenShaderRecordAddress; + uint64_t raygenShaderRecordSize; + uint64_t missShaderBindingTableAddress; + uint64_t missShaderBindingTableSize; + uint64_t missShaderBindingTableStride; + uint64_t hitShaderBindingTableAddress; + uint64_t hitShaderBindingTableSize; + uint64_t hitShaderBindingTableStride; + uint64_t callableShaderBindingTableAddress; + uint64_t callableShaderBindingTableSize; + uint64_t callableShaderBindingTableStride; + uint32_t width; + uint32_t height; + uint32_t depth; +}; + } } diff --git a/include/nbl/builtin/hlsl/random/lcg.hlsl b/include/nbl/builtin/hlsl/random/lcg.hlsl new file mode 100644 index 0000000000..09a2263182 --- /dev/null +++ b/include/nbl/builtin/hlsl/random/lcg.hlsl @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_GLSL_RANDOM_LCG_HLSL_INCLUDED_ +#define _NBL_BUILTIN_GLSL_RANDOM_LCG_HLSL_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ + +struct Lcg +{ + static Lcg construct(NBL_CONST_REF_ARG(uint32_t) state) + { + return Lcg(state); + } + + uint32_t2 operator()() + { + uint32_t LCG_A = 1664525u; + uint32_t LCG_C = 1013904223u; + state = (LCG_A * state + LCG_C); + state &= 0x00FFFFFF; + return state; + } + + uint32_t state; +}; + +} +} +#endif diff --git a/include/nbl/builtin/hlsl/random/pcg.hlsl b/include/nbl/builtin/hlsl/random/pcg.hlsl new file mode 100644 index 0000000000..51a66b355b --- /dev/null +++ b/include/nbl/builtin/hlsl/random/pcg.hlsl @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_GLSL_RANDOM_PCG_HLSL_INCLUDED_ +#define _NBL_BUILTIN_GLSL_RANDOM_PCG_HLSL_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ + +struct Pcg +{ + static Pcg construct(NBL_CONST_REF_ARG(uint32_t) initialState) + { + uint32_t state = {initialState}; + return Pcg(state); + } + + uint32_t operator()() + { + const uint32_t tmp = state * 747796405u + 2891336453u; + const uint32_t word = ((tmp >> ((tmp >> 28u) + 4u)) ^ tmp) * 277803737u; + state = (word >> 22u) ^ word; + return state; + } + + uint32_t state; +}; + +} +} +#endif diff --git a/include/nbl/builtin/hlsl/random/tea.hlsl b/include/nbl/builtin/hlsl/random/tea.hlsl new file mode 100644 index 0000000000..b477094358 --- /dev/null +++ b/include/nbl/builtin/hlsl/random/tea.hlsl @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_GLSL_RANDOM_TEA_HLSL_INCLUDED_ +#define _NBL_BUILTIN_GLSL_RANDOM_TEA_HLSL_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ + +struct Tea +{ + static Tea construct() + { + Tea tea = {}; + return tea; + } + + uint32_t2 operator()(uint32_t stream, uint32_t sequence, uint32_t roundCount) + { + uint32_t sum = 0; + uint32_t v0 = stream; + uint32_t v1 = sequence; + for (uint32_t n = 0; n < roundCount; n++) + { + sum += 0x9e3779b9; + v0 += ((v1 << 4) + 0xa341316c) ^ (v1 + sum) ^ ((v1 >> 5) + 0xc8013ea4); + v1 += ((v0 << 4) + 0xad90777d) ^ (v0 + sum) ^ ((v0 >> 5) + 0x7e95761e); + } + + return uint32_t2(v0, v1); + } + +}; + +} +} +#endif diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index b3cbdcbf80..c43f898264 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -1082,7 +1082,33 @@ inline asset::IRenderpass::STORE_OP getAttachmentStoreOpFrom(const VkAttachmentS return static_cast(op); } +inline VkPipelineBindPoint getVkPipelineBindPointFrom(asset::E_PIPELINE_BIND_POINT bindPoint) +{ + switch (bindPoint) + { + case asset::EPBP_GRAPHICS: + return VK_PIPELINE_BIND_POINT_GRAPHICS; + case asset::EPBP_COMPUTE: + return VK_PIPELINE_BIND_POINT_COMPUTE; + case asset::EPBP_RAY_TRACING: + return VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + default: + assert(!"Invalid code path."); + return VK_PIPELINE_BIND_POINT_MAX_ENUM; + } +} +inline VkStridedDeviceAddressRegionKHR getVkStridedDeviceAddressRegion(const asset::SBufferRange& range, uint32_t stride) +{ + if (range.buffer.get() == nullptr) + return {}; + + return { + .deviceAddress = range.buffer->getDeviceAddress() + range.offset, + .stride = stride, + .size = range.size, + }; +} } #endif diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h new file mode 100644 index 0000000000..ca14d44ee9 --- /dev/null +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -0,0 +1,63 @@ +#ifndef _NBL_C_VULKAN_RAY_TRACING_PIPELINE_H_INCLUDED_ +#define _NBL_C_VULKAN_RAY_TRACING_PIPELINE_H_INCLUDED_ + + +#include "nbl/video/IGPURayTracingPipeline.h" + +#include "nbl/video/CVulkanShader.h" + + +namespace nbl::video +{ + +class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline +{ + using ShaderRef = core::smart_refctd_ptr; + using ShaderContainer = core::smart_refctd_dynamic_array; + using GeneralGroupStackSizeContainer = core::smart_refctd_dynamic_array; + using HitGroupStackSizeContainer = core::smart_refctd_dynamic_array; + + public: + + using ShaderGroupHandleContainer = core::smart_refctd_dynamic_array; + + CVulkanRayTracingPipeline( + const SCreationParams& params, + const VkPipeline vk_pipeline, + ShaderGroupHandleContainer&& shaderGroupHandles); + + inline const void* getNativeHandle() const override { return &m_vkPipeline; } + + inline VkPipeline getInternalObject() const { return m_vkPipeline; } + + virtual const SShaderGroupHandle& getRaygen() const override; + virtual std::span getMissHandles() const override; + virtual std::span getHitHandles() const override; + virtual std::span getCallableHandles() const override; + + virtual uint16_t getRaygenStackSize() const override; + virtual std::span getMissStackSizes() const override; + virtual std::span getHitStackSizes() const override; + virtual std::span getCallableStackSizes() const override; + virtual uint16_t getDefaultStackSize() const override; + + private: + ~CVulkanRayTracingPipeline() override; + + const VkPipeline m_vkPipeline; + ShaderContainer m_shaders; + ShaderGroupHandleContainer m_shaderGroupHandles; + uint16_t m_raygenStackSize; + core::smart_refctd_dynamic_array m_missStackSizes; + core::smart_refctd_dynamic_array m_hitGroupStackSizes; + core::smart_refctd_dynamic_array m_callableStackSizes; + + uint32_t getRaygenIndex() const; + uint32_t getMissBaseIndex() const; + uint32_t getHitBaseIndex() const; + uint32_t getCallableBaseIndex() const; +}; + +} + +#endif diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 8fc7cdc737..a1311756b8 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -316,6 +316,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject //! state setup bool bindComputePipeline(const IGPUComputePipeline* const pipeline); bool bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline); + bool bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline); bool bindDescriptorSets( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, @@ -523,6 +524,15 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject }; bool resolveImage(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* const pRegions); + bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSize); + bool IGPUCommandBuffer::traceRays( + const asset::SBufferRange& raygenGroupRange, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + uint32_t width, uint32_t height, uint32_t depth); + bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBinding& indirectBinding); + //! Secondary CommandBuffer execute bool executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs); @@ -534,6 +544,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual const void* getNativeHandle() const = 0; inline const core::unordered_map& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; } + const IGPUGraphicsPipeline* getBoundGraphicsPipeline() const { return m_boundGraphicsPipeline; } + const IGPUComputePipeline* getBoundComputePipeline() const { return m_boundComputePipeline; } + const IGPURayTracingPipeline* getBoundRayTracingPipeline() const { return m_boundRayTracingPipeline; } protected: friend class IQueue; @@ -618,6 +631,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; virtual bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) = 0; + virtual bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) = 0; virtual bool bindDescriptorSets_impl( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, @@ -663,6 +677,16 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) = 0; virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; + virtual bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) = 0; + virtual bool traceRays_impl( + const asset::SBufferRange& raygenGroupRange, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + uint32_t width, uint32_t height, uint32_t depth) = 0; + virtual bool IGPUCommandBuffer::traceRaysIndirect_impl( + const asset::SBufferBinding& indirectBinding) = 0; + virtual bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) = 0; virtual void releaseResourcesBackToPool_impl() {} @@ -684,6 +708,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_state = STATE::INITIAL; m_boundDescriptorSetsRecord.clear(); + m_boundGraphicsPipeline= nullptr; + m_boundComputePipeline= nullptr; + m_boundRayTracingPipeline= nullptr; + m_haveRtPipelineStackSize = false; m_commandList.head = nullptr; m_commandList.tail = nullptr; @@ -697,6 +725,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject { deleteCommandList(); m_boundDescriptorSetsRecord.clear(); + m_boundGraphicsPipeline= nullptr; + m_boundComputePipeline= nullptr; + m_boundRayTracingPipeline= nullptr; + m_haveRtPipelineStackSize = false; releaseResourcesBackToPool_impl(); } @@ -806,6 +838,13 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject } return invalidImage(image,IGPUImage::EUF_TRANSFER_SRC_BIT); } + + bool invalidShaderGroups( + const asset::SBufferRange& raygenGroupRange, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + core::bitflag flags) const; // returns total number of Geometries across all AS build infos template @@ -823,12 +862,16 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject // created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT // or IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT. core::unordered_map m_boundDescriptorSetsRecord; + const IGPUGraphicsPipeline* m_boundGraphicsPipeline; + const IGPUComputePipeline* m_boundComputePipeline; + const IGPURayTracingPipeline* m_boundRayTracingPipeline; IGPUCommandPool::CCommandSegmentListPool::SCommandSegmentList m_commandList = {}; uint64_t m_resetCheckedStamp; STATE m_state = STATE::INITIAL; bool m_noCommands = true; + bool m_haveRtPipelineStackSize = false; // only useful while recording SInheritanceInfo m_cachedInheritanceInfo; core::bitflag m_recordingFlags = USAGE::NONE; diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index ad9b110e25..6b3a5353a0 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -10,6 +10,7 @@ #include "nbl/video/IGPUDescriptorSet.h" #include "nbl/video/IGPUComputePipeline.h" #include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/video/IGPURayTracingPipeline.h" #include "nbl/video/IGPUFramebuffer.h" #include "nbl/video/IQueryPool.h" @@ -151,6 +152,9 @@ class IGPUCommandPool : public IBackendObject class CBuildAccelerationStructuresCmd; // for both vkCmdBuildAccelerationStructuresKHR and vkCmdBuildAccelerationStructuresIndirectKHR class CCopyAccelerationStructureCmd; class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR + class CTraceRaysCmd; + class CTraceRaysIndirectCmd; + class CBindRayTracingPipelineCmd; protected: IGPUCommandPool(core::smart_refctd_ptr&& dev, const core::bitflag _flags, const uint8_t _familyIx) @@ -822,6 +826,46 @@ class IGPUCommandPool::CCopyAccelerationStructureToOrFromMemoryCmd final : publi core::smart_refctd_ptr m_buffer; }; +class IGPUCommandPool::CTraceRaysCmd final : public IFixedSizeCommand +{ + public: + CTraceRaysCmd( + core::smart_refctd_ptr&& raygenGroupBuffer, + core::smart_refctd_ptr&& hitGroupsBuffer, + core::smart_refctd_ptr&& missGroupsBuffer, + core::smart_refctd_ptr&& callableGroupsBuffer) : + m_raygenGroupBuffer(raygenGroupBuffer), + m_hitGroupsBuffer(hitGroupsBuffer), + m_missGroupsBuffer(missGroupsBuffer), + m_callableGroupsBuffer(callableGroupsBuffer) {} + + + private: + core::smart_refctd_ptr m_raygenGroupBuffer; + core::smart_refctd_ptr m_hitGroupsBuffer; + core::smart_refctd_ptr m_missGroupsBuffer; + core::smart_refctd_ptr m_callableGroupsBuffer; +}; + +class IGPUCommandPool::CTraceRaysIndirectCmd final : public IFixedSizeCommand +{ + public: + CTraceRaysIndirectCmd( + core::smart_refctd_ptr&& bindingBuffer) : + m_bindingBuffer(bindingBuffer) {} + + private: + core::smart_refctd_ptr m_bindingBuffer; +}; + +class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand +{ + public: + CBindRayTracingPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + + private: + core::smart_refctd_ptr m_pipeline; +}; NBL_ENUM_ADD_BITWISE_OPERATORS(IGPUCommandPool::CREATE_FLAGS) } diff --git a/include/nbl/video/IGPUComputePipeline.h b/include/nbl/video/IGPUComputePipeline.h index 1ecf7b668b..4d0fbaa39f 100644 --- a/include/nbl/video/IGPUComputePipeline.h +++ b/include/nbl/video/IGPUComputePipeline.h @@ -35,12 +35,8 @@ class IGPUComputePipeline : public IBackendObject, public asset::IPipeline&& _layout, const core::bitflag _flags) : - IBackendObject(core::smart_refctd_ptr(_layout->getOriginDevice())), pipeline_t(std::move(_layout)), m_flags(_flags) {} + IBackendObject(core::smart_refctd_ptr(_layout->getOriginDevice())), + pipeline_t(std::move(_layout)), + m_flags(_flags) {} virtual ~IGPUComputePipeline() = default; const core::bitflag m_flags; diff --git a/include/nbl/video/IGPUGraphicsPipeline.h b/include/nbl/video/IGPUGraphicsPipeline.h index cfbba042b7..5f4e61c0d9 100644 --- a/include/nbl/video/IGPUGraphicsPipeline.h +++ b/include/nbl/video/IGPUGraphicsPipeline.h @@ -27,12 +27,8 @@ class IGPUGraphicsPipeline : public IBackendObject, public asset::IGraphicsPipel DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), VIEW_INDEX_FROM_DEVICE_INDEX = 1<<3, - CAPTURE_STATISTICS = base_flag(CAPTURE_STATISTICS), - CAPTURE_INTERNAL_REPRESENTATIONS = base_flag(CAPTURE_INTERNAL_REPRESENTATIONS), FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), - LINK_TIME_OPTIMIZATION = base_flag(LINK_TIME_OPTIMIZATION), - RETAIN_LINK_TIME_OPTIMIZATION_INFO = base_flag(RETAIN_LINK_TIME_OPTIMIZATION_INFO) }; #undef base_flag diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h new file mode 100644 index 0000000000..2d0b8961f9 --- /dev/null +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -0,0 +1,97 @@ +#ifndef _NBL_I_GPU_RAY_TRACING_PIPELINE_H_INCLUDED_ +#define _NBL_I_GPU_RAY_TRACING_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IPipeline.h" +#include "nbl/asset/IRayTracingPipeline.h" + +#include "nbl/video/SPipelineCreationParams.h" + + +namespace nbl::video +{ + +class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingPipeline +{ + using pipeline_t = asset::IRayTracingPipeline; + + public: + + struct SShaderGroupHandle + { + private: + uint8_t data[video::SPhysicalDeviceLimits::ShaderGroupHandleSize]; + }; + static_assert(sizeof(SShaderGroupHandle) == video::SPhysicalDeviceLimits::ShaderGroupHandleSize); + + struct SHitGroupStackSize + { + uint16_t closestHit; + uint16_t anyHit; + uint16_t intersection; + }; + + struct SCreationParams final : pipeline_t::SCreationParams, SPipelineCreationParams + { + + inline SSpecializationValidationResult valid() const + { + if (!layout) + return {}; + + SSpecializationValidationResult retval = { + .count=0, + .dataSize=0, + }; + const bool valid = pipeline_t::SCreationParams::impl_valid([&retval](const IGPUShader::SSpecInfo& info)->bool + { + const auto dataSize = info.valid(); + if (dataSize<0) + return false; + else if (dataSize==0) + return true; + + const size_t count = info.entries ? info.entries->size():0x80000000ull; + if (count>0x7fffffff) + return {}; + retval += {.count=dataSize ? static_cast(count):0,.dataSize=static_cast(dataSize)}; + return retval; + }); + if (!valid) + return {}; + return retval; + } + + inline std::span getShaders() const { return shaders; } + + }; + + inline core::bitflag getCreationFlags() const { return m_flags; } + + // Vulkan: const VkPipeline* + virtual const void* getNativeHandle() const = 0; + + virtual const SShaderGroupHandle& getRaygen() const = 0; + virtual std::span getMissHandles() const = 0; + virtual std::span getHitHandles() const = 0; + virtual std::span getCallableHandles() const = 0; + + virtual uint16_t getRaygenStackSize() const = 0; + virtual std::span getMissStackSizes() const = 0; + virtual std::span getHitStackSizes() const = 0; + virtual std::span getCallableStackSizes() const = 0; + virtual uint16_t getDefaultStackSize() const = 0; + + protected: + IGPURayTracingPipeline(const SCreationParams& params) : IBackendObject(core::smart_refctd_ptr(params.layout->getOriginDevice())), + pipeline_t(params), + m_flags(params.flags) + {} + + virtual ~IGPURayTracingPipeline() = default; + + const core::bitflag m_flags; +}; + +} + +#endif diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 35c40ce2a4..0952fa1471 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -326,7 +326,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe const auto maxSize = getPhysicalDeviceLimits().maxBufferSize; if (creationParams.size>maxSize) { - m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize); + m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit (%u)!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize); return nullptr; } return createBuffer_impl(std::move(creationParams)); @@ -907,6 +907,10 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe core::smart_refctd_ptr* const output ); + bool createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output); + // queries inline core::smart_refctd_ptr createQueryPool(const IQueryPool::SCreationParams& params) { @@ -1164,6 +1168,12 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe core::smart_refctd_ptr* const output, const IGPUGraphicsPipeline::SCreationParams::SSpecializationValidationResult& validation ) = 0; + virtual void createRayTracingPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult& validation + ) = 0; virtual core::smart_refctd_ptr createQueryPool_impl(const IQueryPool::SCreationParams& params) = 0; virtual bool getQueryPoolResults_impl(const IQueryPool* const queryPool, const uint32_t firstQuery, const uint32_t queryCount, void* const pData, const size_t stride, const core::bitflag flags) = 0; diff --git a/include/nbl/video/SPhysicalDeviceLimits.h b/include/nbl/video/SPhysicalDeviceLimits.h index a29d7258c3..4d775ef34d 100644 --- a/include/nbl/video/SPhysicalDeviceLimits.h +++ b/include/nbl/video/SPhysicalDeviceLimits.h @@ -25,6 +25,8 @@ struct SPhysicalDeviceLimits }; using RESOLVE_MODE_FLAGS = nbl::hlsl::ResolveModeFlags; + constexpr static inline uint32_t ShaderGroupHandleSize = 32; + #include "nbl/video/SPhysicalDeviceLimits_members.h" // utility functions diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index ecb3a84f9d..ad464bd035 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -275,6 +275,7 @@ set(NBL_VIDEO_SOURCES ${NBL_ROOT_PATH}/src/nbl/video/CVulkanConnection.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanPhysicalDevice.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanGraphicsPipeline.cpp + ${NBL_ROOT_PATH}/src/nbl/video/CVulkanRayTracingPipeline.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanEvent.cpp ${NBL_ROOT_PATH}/src/nbl/video/CSurfaceVulkan.cpp diff --git a/src/nbl/asset/interchange/CHLSLLoader.cpp b/src/nbl/asset/interchange/CHLSLLoader.cpp index a6924b132a..e049f3bdab 100644 --- a/src/nbl/asset/interchange/CHLSLLoader.cpp +++ b/src/nbl/asset/interchange/CHLSLLoader.cpp @@ -38,6 +38,12 @@ SAssetBundle CHLSLLoader::loadAsset(system::IFile* _file, const IAssetLoader::SA {".comp.hlsl",IShader::E_SHADER_STAGE::ESS_COMPUTE}, {".mesh.hlsl",IShader::E_SHADER_STAGE::ESS_MESH}, {".task.hlsl",IShader::E_SHADER_STAGE::ESS_TASK}, + {".rgen.hlsl",IShader::E_SHADER_STAGE::ESS_RAYGEN}, + {".rahit.hlsl",IShader::E_SHADER_STAGE::ESS_ANY_HIT}, + {".rchit.hlsl",IShader::E_SHADER_STAGE::ESS_CLOSEST_HIT}, + {".rmiss.hlsl",IShader::E_SHADER_STAGE::ESS_MISS}, + {".rint.hlsl",IShader::E_SHADER_STAGE::ESS_INTERSECTION}, + {".rcall.hlsl",IShader::E_SHADER_STAGE::ESS_CALLABLE}, }; auto shaderStage = IShader::E_SHADER_STAGE::ESS_UNKNOWN; for (auto& it : typeFromExt) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 0659afc6b5..929a9411b0 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -44,6 +44,12 @@ static const wchar_t* ShaderStageToString(asset::IShader::E_SHADER_STAGE stage) return L"as"; case asset::IShader::E_SHADER_STAGE::ESS_MESH: return L"ms"; + case asset::IShader::E_SHADER_STAGE::ESS_RAYGEN: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_ANY_HIT: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_CLOSEST_HIT: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_MISS: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_INTERSECTION: [[fallthrough]]; + case asset::IShader::E_SHADER_STAGE::ESS_CALLABLE: [[fallthrough]]; case asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY: return L"lib"; default: diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 291ee64bad..48ede9d6d5 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -106,6 +106,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/property_pool/copy.comp") # random numbers LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/random/xoroshiro.glsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/xoroshiro.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/pcg.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/lcg.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/random/tea.hlsl") # sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/sampling/bilinear.glsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/sampling/box_muller_transform.glsl") diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index 31d3129240..b569a5fde2 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -409,6 +409,12 @@ bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* return true; } +bool CVulkanCommandBuffer::bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) +{ + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, static_cast(pipeline)->getInternalObject()); + return true; +} + bool CVulkanCommandBuffer::bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount, const uint32_t* const dynamicOffsets) { VkDescriptorSet vk_descriptorSets[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT] = {}; @@ -449,7 +455,7 @@ bool CVulkanCommandBuffer::bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_ dynamicOffsetCount += dynamicOffsetCountPerSet[setIndex]; getFunctionTable().vkCmdBindDescriptorSets( - m_cmdbuf,static_cast(pipelineBindPoint),vk_pipelineLayout, + m_cmdbuf,getVkPipelineBindPointFrom(pipelineBindPoint),vk_pipelineLayout, firstSet+first, last-first, vk_descriptorSets+first, dynamicOffsetCount, dynamicOffsets+dynamicOffsetsBindOffset ); @@ -823,6 +829,38 @@ bool CVulkanCommandBuffer::resolveImage_impl(const IGPUImage* const srcImage, co return true; } +bool CVulkanCommandBuffer::setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) +{ + getFunctionTable().vkCmdSetRayTracingPipelineStackSizeKHR(m_cmdbuf, pipelineStackSize); + return true; +} + +bool CVulkanCommandBuffer::traceRays_impl( + const asset::SBufferRange& raygenGroupRange, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + uint32_t width, uint32_t height, uint32_t depth) +{ + const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupRange.size); + const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); + const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(hitGroupsRange, hitGroupStride); + const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(callableGroupsRange, callableGroupStride); + + getFunctionTable().vkCmdTraceRaysKHR(m_cmdbuf, + &vk_raygenGroupRegion, + &vk_missGroupsRegion, + &vk_hitGroupsRegion, + &vk_callableGroupsRegion, + width, height, depth); + return true; +} + +bool CVulkanCommandBuffer::traceRaysIndirect_impl(const asset::SBufferBinding& indirectBinding) +{ + getFunctionTable().vkCmdTraceRaysIndirect2KHR(m_cmdbuf, indirectBinding.buffer->getDeviceAddress() + indirectBinding.offset); + return true; +} bool CVulkanCommandBuffer::executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) { IGPUCommandPool::StackAllocation vk_commandBuffers(m_cmdpool,count); diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index e215f07c1f..634d8c4f2b 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -183,6 +183,7 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) override; + bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) override; bool bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* const dynamicOffsets = nullptr) override; bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) override; bool bindVertexBuffers_impl(const uint32_t firstBinding, const uint32_t bindingCount, const asset::SBufferBinding* const pBindings) override; @@ -224,6 +225,15 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) override; bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) override; + bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) override; + bool traceRays_impl( + const asset::SBufferRange& raygenGroupRange, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + uint32_t width, uint32_t height, uint32_t depth) override; + bool traceRaysIndirect_impl(const asset::SBufferBinding& indirectBinding) override; + bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) override; bool insertDebugMarker(const char* name, const core::vector4df_SIMD& color) override final diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index cc96eb6e51..27f4e75548 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1425,6 +1425,132 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( std::fill_n(output,vk_createInfos.size(),nullptr); } +void CVulkanLogicalDevice::createRayTracingPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult& validation +) +{ + using SShaderGroupParams = asset::IRayTracingPipelineBase::SShaderGroupsParams; + using SGeneralShaderGroup = asset::IRayTracingPipelineBase::SGeneralShaderGroup; + using SHitShaderGroup = asset::IRayTracingPipelineBase::SHitShaderGroup; + + const auto dynamicStates = std::array{ VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR }; + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0u, + .dynamicStateCount = dynamicStates.size(), + .pDynamicStates = dynamicStates.data(), + }; + + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject():VK_NULL_HANDLE; + + size_t maxShaderStages = 0; + for (const auto& info : createInfos) + maxShaderStages += info.shaders.size(); + size_t maxShaderGroups = 0; + for (const auto& info : createInfos) + maxShaderGroups += info.shaderGroups.getShaderGroupCount(); + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,nullptr }); + core::vector vk_requiredSubgroupSize(maxShaderStages,{ + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr + }); + core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr }); + core::vector vk_shaderGroup(maxShaderGroups, { VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr}); + core::vector vk_specializationInfos(maxShaderStages, { 0, nullptr, 0, nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + + auto outCreateInfo = vk_createInfos.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outShaderStage = vk_shaderStage.data(); + auto outShaderGroup = vk_shaderGroup.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + auto getVkShaderIndex = [](uint32_t index) { return index == SShaderGroupParams::SIndex::Unused ? VK_SHADER_UNUSED_KHR : index; }; + auto getGeneralVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](SGeneralShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR + { + return { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .pNext = nullptr, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = getVkShaderIndex(group.index), + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR, + }; + }; + auto getHitVkRayTracingShaderGroupCreateInfo = [getVkShaderIndex](SHitShaderGroup group) -> VkRayTracingShaderGroupCreateInfoKHR + { + return { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .pNext = nullptr, + .type = group.intersection == SShaderGroupParams::SIndex::Unused ? + VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = getVkShaderIndex(group.closestHit), + .anyHitShader = getVkShaderIndex(group.anyHit), + .intersectionShader = getVkShaderIndex(group.intersection), + }; + }; + for (const auto& info : createInfos) + { + initPipelineCreateInfo(outCreateInfo,info); + outCreateInfo->pStages = outShaderStage; + for (const auto& specInfo : info.shaders) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(specInfo,outRequiredSubgroupSize,outSpecInfo,outSpecMapEntry,outSpecData); + } + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); + assert(outCreateInfo->stageCount != 0); + + const auto& shaderGroups = info.shaderGroups; + outCreateInfo->pGroups = outShaderGroup; + *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroups.raygen); + for (const auto& shaderGroup : shaderGroups.misses) + *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); + for (const auto& shaderGroup : shaderGroups.hits) + *(outShaderGroup++) = getHitVkRayTracingShaderGroupCreateInfo(shaderGroup); + for (const auto& shaderGroup : shaderGroups.callables) + *(outShaderGroup++) = getGeneralVkRayTracingShaderGroupCreateInfo(shaderGroup); + outCreateInfo->groupCount = 1 + shaderGroups.hits.size() + shaderGroups.misses.size() + shaderGroups.callables.size(); + outCreateInfo->maxPipelineRayRecursionDepth = info.cached.maxRecursionDepth; + if (info.cached.dynamicStackSize) + { + outCreateInfo->pDynamicState = &vk_dynamicStateCreateInfo; + } + } + + auto vk_pipelines = reinterpret_cast(output); + if (m_devf.vk.vkCreateRayTracingPipelinesKHR(m_vkdev, VK_NULL_HANDLE, vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) + { + for (size_t i=0ull; i(handleCount); + const auto success = m_devf.vk.vkGetRayTracingShaderGroupHandlesKHR(m_vkdev, vk_pipeline, 0, handleCount, dataSize, shaderGroupHandles->data()) == VK_SUCCESS; + assert(success); + + output[i] = core::make_smart_refctd_ptr( + createInfos[i], + vk_pipeline, + std::move(shaderGroupHandles) + ); + } + } + else + std::fill_n(output,vk_createInfos.size(),nullptr); +} + core::smart_refctd_ptr CVulkanLogicalDevice::createQueryPool_impl(const IQueryPool::SCreationParams& params) { VkQueryPoolCreateInfo info = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr}; diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 410efe5d80..3ed5e9983a 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -31,6 +31,7 @@ #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" #include "nbl/video/CVulkanGraphicsPipeline.h" +#include "nbl/video/CVulkanRayTracingPipeline.h" namespace nbl::video @@ -302,6 +303,13 @@ class CVulkanLogicalDevice final : public ILogicalDevice const IGPUGraphicsPipeline::SCreationParams::SSpecializationValidationResult& validation ) override; + void createRayTracingPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult& validation + ) override; + // queries core::smart_refctd_ptr createQueryPool_impl(const IQueryPool::SCreationParams& params) override; bool getQueryPoolResults_impl(const IQueryPool* const queryPool, const uint32_t firstQuery, const uint32_t queryCount, void* const pData, const size_t stride, const core::bitflag flags) override; diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index fc45b987bd..3b7df3a9dd 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -1787,8 +1787,11 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic accelerationStructureFeatures.descriptorBindingAccelerationStructureUpdateAfterBind = enabledFeatures.accelerationStructure; rayTracingPipelineFeatures.rayTracingPipeline = enabledFeatures.rayTracingPipeline; - rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay = m_rdoc_api!=nullptr; - rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplayMixed = m_rdoc_api!=nullptr; + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay = + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay && m_initData.api->isRunningInGraphicsDebugger(); + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplayMixed = + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplayMixed && + rayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay; rayTracingPipelineFeatures.rayTracingPipelineTraceRaysIndirect = enabledFeatures.rayTracingPipeline; rayTracingPipelineFeatures.rayTraversalPrimitiveCulling = enabledFeatures.rayTraversalPrimitiveCulling; diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp new file mode 100644 index 0000000000..0db3ca94ed --- /dev/null +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -0,0 +1,175 @@ +#include "nbl/asset/IRayTracingPipeline.h" + +#include "nbl/video/CVulkanRayTracingPipeline.h" +#include "nbl/video/CVulkanLogicalDevice.h" +#include "nbl/video/IGPURayTracingPipeline.h" + +#include + +namespace nbl::video +{ + + CVulkanRayTracingPipeline::CVulkanRayTracingPipeline( + const SCreationParams& params, + const VkPipeline vk_pipeline, + ShaderGroupHandleContainer&& shaderGroupHandles) : + IGPURayTracingPipeline(params), + m_vkPipeline(vk_pipeline), + m_shaders(core::make_refctd_dynamic_array(params.shaders.size())), + m_missStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.misses.size())), + m_hitGroupStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.hits.size())), + m_callableStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.hits.size())), + m_shaderGroupHandles(std::move(shaderGroupHandles)) + { + for (size_t shaderIx = 0; shaderIx < params.shaders.size(); shaderIx++) + m_shaders->operator[](shaderIx) = ShaderRef(static_cast(params.shaders[shaderIx].shader)); + + const auto* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + + auto getVkShaderGroupStackSize = [&](uint32_t baseGroupIx, uint32_t shaderGroupIx, uint32_t shaderIx, VkShaderGroupShaderKHR shaderType) -> uint16_t + { + if (shaderIx == SShaderGroupsParams::SIndex::Unused) + return 0; + + return vk->vk.vkGetRayTracingShaderGroupStackSizeKHR( + vulkanDevice->getInternalObject(), + m_vkPipeline, + baseGroupIx + shaderGroupIx, + shaderType + ); + }; + + m_raygenStackSize = getVkShaderGroupStackSize(getRaygenIndex(), 0, params.shaderGroups.raygen.index, VK_SHADER_GROUP_SHADER_GENERAL_KHR); + + for (size_t shaderGroupIx = 0; shaderGroupIx < params.shaderGroups.misses.size(); shaderGroupIx++) + { + m_missStackSizes->operator[](shaderGroupIx) = getVkShaderGroupStackSize( + getMissBaseIndex(), + shaderGroupIx, + params.shaderGroups.misses[shaderGroupIx].index, + VK_SHADER_GROUP_SHADER_GENERAL_KHR); + } + + for (size_t shaderGroupIx = 0; shaderGroupIx < params.shaderGroups.hits.size(); shaderGroupIx++) + { + const auto& hitGroup = params.shaderGroups.hits[shaderGroupIx]; + const auto baseIndex = getHitBaseIndex(); + m_hitGroupStackSizes->operator[](shaderGroupIx) = SHitGroupStackSize{ + .closestHit = getVkShaderGroupStackSize(baseIndex,shaderGroupIx, hitGroup.closestHit, VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR), + .anyHit = getVkShaderGroupStackSize(baseIndex, shaderGroupIx, hitGroup.anyHit,VK_SHADER_GROUP_SHADER_ANY_HIT_KHR), + .intersection = getVkShaderGroupStackSize(baseIndex, shaderGroupIx, hitGroup.intersection, VK_SHADER_GROUP_SHADER_INTERSECTION_KHR), + }; + } + + for (size_t shaderGroupIx = 0; shaderGroupIx < params.shaderGroups.callables.size(); shaderGroupIx++) + { + m_callableStackSizes->operator[](shaderGroupIx) = getVkShaderGroupStackSize( + getCallableBaseIndex(), + shaderGroupIx, + params.shaderGroups.callables[shaderGroupIx].index, + VK_SHADER_GROUP_SHADER_GENERAL_KHR); + } + } + + CVulkanRayTracingPipeline::~CVulkanRayTracingPipeline() + { + const auto* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); + } + + const IGPURayTracingPipeline::SShaderGroupHandle& CVulkanRayTracingPipeline::getRaygen() const + { + return m_shaderGroupHandles->operator[](getRaygenIndex()); + } + + std::span CVulkanRayTracingPipeline::getMissHandles() const + { + const auto baseIndex = getMissBaseIndex(); + return std::span(m_shaderGroupHandles->begin() + baseIndex, m_missShaderGroups->size()); + } + + std::span CVulkanRayTracingPipeline::getHitHandles() const + { + const auto baseIndex = getHitBaseIndex(); + return std::span(m_shaderGroupHandles->begin() + baseIndex, m_hitShaderGroups->size()); + } + + std::span CVulkanRayTracingPipeline::getCallableHandles() const + { + const auto baseIndex = getCallableBaseIndex(); + return std::span(m_shaderGroupHandles->begin() + baseIndex, m_callableShaderGroups->size()); + } + + uint16_t CVulkanRayTracingPipeline::getRaygenStackSize() const + { + return m_raygenStackSize; + } + + std::span CVulkanRayTracingPipeline::getMissStackSizes() const + { + return std::span(m_missStackSizes->begin(), m_missStackSizes->end()); + } + + std::span CVulkanRayTracingPipeline::getHitStackSizes() const + { + return std::span(m_hitGroupStackSizes->begin(), m_hitGroupStackSizes->end()); + } + + std::span CVulkanRayTracingPipeline::getCallableStackSizes() const + { + return std::span(m_callableStackSizes->begin(), m_callableStackSizes->end()); + } + + uint16_t CVulkanRayTracingPipeline::getDefaultStackSize() const + { + // calculation follow the formula from + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#ray-tracing-pipeline-stack + const auto raygenStackMax = m_raygenStackSize; + + auto getMaxSize = [&](auto ranges, auto valProj) -> uint16_t + { + auto maxValue = 0; + for (const auto& val : ranges) + { + maxValue = std::max(maxValue, std::invoke(valProj, val)); + } + return maxValue; + }; + + const auto closestHitStackMax = getMaxSize(getHitStackSizes(), &SHitGroupStackSize::closestHit); + const auto anyHitStackMax = getMaxSize(getHitStackSizes(), &SHitGroupStackSize::anyHit); + const auto intersectionStackMax = getMaxSize(getHitStackSizes(), &SHitGroupStackSize::intersection); + const auto missStackMax = getMaxSize(getMissStackSizes(), std::identity{}); + const auto callableStackMax = getMaxSize(getCallableStackSizes(), std::identity{}); + return raygenStackMax + std::min(1, m_params.maxRecursionDepth) * + std::max(closestHitStackMax, std::max(missStackMax, intersectionStackMax + anyHitStackMax)) + + std::max(0, m_params.maxRecursionDepth - 1) * std::max(closestHitStackMax, missStackMax) + 2 * + callableStackMax; + } + + uint32_t CVulkanRayTracingPipeline::getRaygenIndex() const + { + return 0; + } + + uint32_t CVulkanRayTracingPipeline::getMissBaseIndex() const + { + // one raygen group before this groups + return 1; + } + + uint32_t CVulkanRayTracingPipeline::getHitBaseIndex() const + { + // one raygen group + miss groups before this groups + return 1 + m_missShaderGroups->size(); + } + + uint32_t CVulkanRayTracingPipeline::getCallableBaseIndex() const + { + // one raygen group + miss groups + hit groups before this groups + return 1 + m_missShaderGroups->size() + m_hitShaderGroups->size(); + } + +} diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 43fc709b77..91bdd366dd 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -5,6 +5,8 @@ #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" +#include "nbl/builtin/hlsl/indirect_commands.hlsl" + namespace nbl::video { @@ -682,6 +684,93 @@ bool IGPUCommandBuffer::copyImage(const IGPUImage* const srcImage, const IGPUIma return copyImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); } +bool IGPUCommandBuffer::invalidShaderGroups( + const asset::SBufferRange& raygenGroupRange, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + core::bitflag flags) const +{ + + using PipelineFlag = IGPURayTracingPipeline::SCreationParams::FLAGS; + using PipelineFlags = core::bitflag; + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03696 + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03697 + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03512 + const auto shouldHaveHitGroup = flags & + (PipelineFlags(PipelineFlag::NO_NULL_ANY_HIT_SHADERS) | + PipelineFlag::NO_NULL_CLOSEST_HIT_SHADERS | + PipelineFlag::NO_NULL_INTERSECTION_SHADERS); + if (shouldHaveHitGroup && !hitGroupsRange.buffer) + { + NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03511 + const auto shouldHaveMissGroup = flags & PipelineFlag::NO_NULL_MISS_SHADERS; + if (shouldHaveMissGroup && !missGroupsRange.buffer) + { + NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); + return true; + } + + const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); + auto invalidBufferRegion = [this, &limits](const asset::SBufferRange& range, uint32_t stride, const char* groupName) -> bool + { + const IGPUBuffer* const buffer = range.buffer.get(); + + if (!buffer) return false; + + if (!range.isValid()) + { + NBL_LOG_ERROR("%s buffer range is not valid!", groupName); + return true; + } + + if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) + { + NBL_LOG_ERROR("%s buffer must have EUF_SHADER_DEVICE_ADDRESS_BIT usage!", groupName); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pRayGenShaderBindingTable-03689 + if ((range.buffer->getDeviceAddress() + range.offset) % limits.shaderGroupBaseAlignment != 0) + { + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", groupName, limits.shaderGroupBaseAlignment); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pHitShaderBindingTable-03690 + if (stride % limits.shaderGroupHandleAlignment) + { + NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", groupName, limits.shaderGroupHandleAlignment); + return true; + } + + if (stride > limits.maxShaderGroupStride) + { + NBL_LOG_ERROR("%s buffer stride must not exceed %u!", groupName, limits.shaderGroupHandleAlignment); + return true; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pRayGenShaderBindingTable-03681 + if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT)) + { + NBL_LOG_ERROR("%s buffer must have EUF_SHADER_BINDING_TABLE_BIT usage!", groupName); + return true; + } + + return false; + }; + + if (invalidBufferRegion(raygenGroupRange, raygenGroupRange.size, "Raygen Group")) return true; + if (invalidBufferRegion(missGroupsRange, missGroupStride, "Miss groups")) return true; + if (invalidBufferRegion(hitGroupsRange, hitGroupStride, "Hit groups")) return true; + if (invalidBufferRegion(callableGroupsRange, callableGroupStride, "Callable groups")) return true; + return false; +} template uint32_t IGPUCommandBuffer::buildAccelerationStructures_common(const std::span infos, BuildRangeInfos ranges, const IGPUBuffer* const indirectBuffer) @@ -854,6 +943,8 @@ bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pip return false; } + m_boundComputePipeline = pipeline; + m_noCommands = false; bindComputePipeline_impl(pipeline); @@ -880,10 +971,40 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p return false; } + m_boundGraphicsPipeline = pipeline; + m_noCommands = false; return bindGraphicsPipeline_impl(pipeline); } +bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline) +{ + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT)) + return false; + + if (!pipeline || !this->isCompatibleDevicewise(pipeline)) + { + NBL_LOG_ERROR("incompatible pipeline device!"); + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + if (!pipeline->getCachedCreationParams().dynamicStackSize) + { + m_haveRtPipelineStackSize = false; + } + + m_boundRayTracingPipeline = pipeline; + + m_noCommands = false; + return bindRayTracingPipeline_impl(pipeline); +} + bool IGPUCommandBuffer::bindDescriptorSets( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, @@ -1774,6 +1895,144 @@ bool IGPUCommandBuffer::resolveImage(const IGPUImage* const srcImage, const IGPU return resolveImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); } +bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSize) +{ + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + return false; + if (m_boundRayTracingPipeline != nullptr && m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize) + { + NBL_LOG_ERROR("Cannot set dynamic state when state is not mark as dynamic on bound pipeline!"); + } + m_haveRtPipelineStackSize = true; + return setRayTracingPipelineStackSize_impl(pipelineStackSize); +} + +bool IGPUCommandBuffer::traceRays( + const asset::SBufferRange& raygenGroupRange, + const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, + const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, + const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, + uint32_t width, uint32_t height, uint32_t depth) +{ + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + return false; + + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-width-03638 + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-height-03639 + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-depth-03640 + const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); + const auto maxWidth = limits.maxComputeWorkGroupCount[0] * limits.maxWorkgroupSize[0]; + const auto maxHeight = limits.maxComputeWorkGroupCount[1] * limits.maxWorkgroupSize[1]; + const auto maxDepth = limits.maxComputeWorkGroupCount[2] * limits.maxWorkgroupSize[2]; + if (width == 0 || height == 0 || depth == 0 || width > maxWidth || height > maxHeight || depth > maxDepth) + { + NBL_LOG_ERROR("invalid work counts (%d, %d, %d)!", width, height, depth); + return false; + } + + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-width-03641 + const auto invocationCount = width * height * depth; + if (invocationCount > limits.maxRayDispatchInvocationCount) + { + NBL_LOG_ERROR("invalid invocation count (%d)!", invocationCount); + return false; + } + + if (m_boundRayTracingPipeline == nullptr) + { + NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); + return false; + } + const auto flags = m_boundRayTracingPipeline->getCreationFlags(); + + if (invalidShaderGroups(raygenGroupRange, + missGroupsRange, missGroupStride, + hitGroupsRange, hitGroupStride, + callableGroupsRange, callableGroupStride, + flags)) + { + NBL_LOG_ERROR("invalid shader groups for traceRays command!"); + return false; + } + + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysKHR-None-09458 + if (m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize && !m_haveRtPipelineStackSize) + { + NBL_LOG_ERROR("no setRayTracingPipelineStackSize command submitted before traceRays command with dynamic stack size pipeline!"); + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, + core::smart_refctd_ptr(raygenGroupRange.buffer), + core::smart_refctd_ptr(missGroupsRange.buffer), + core::smart_refctd_ptr(hitGroupsRange.buffer), + core::smart_refctd_ptr(callableGroupsRange.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + + return traceRays_impl( + raygenGroupRange, + missGroupsRange, missGroupStride, + hitGroupsRange, hitGroupStride, + callableGroupsRange, callableGroupStride, + width, height, depth); +} + +bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBinding& indirectBinding) +{ + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + return false; + + if (m_boundRayTracingPipeline == nullptr) + { + NBL_LOG_ERROR("invalid bound pipeline for traceRays command!"); + return false; + } + + const auto& features = getOriginDevice()->getEnabledFeatures(); + + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-rayTracingMotionBlurPipelineTraceRaysIndirect-04951 + if (m_boundRayTracingPipeline->getCreationFlags() & IGPURayTracingPipeline::SCreationParams::FLAGS::ALLOW_MOTION && !features.rayTracingMotionBlurPipelineTraceRaysIndirect) + { + NBL_LOG_ERROR("If the bound ray tracing pipeline is created with ALLOW_MOTION, rayTracingMotionBlurPipelineTraceRaysIndirect feature must be enabled!"); + return false; + } + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03634 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdTraceRaysIndirectKHR.html#VUID-vkCmdTraceRaysIndirectKHR-indirectDeviceAddress-03633 + if (invalidBufferBinding(indirectBinding, 4u,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) + return false; + + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-indirectDeviceAddress-03633 + if (indirectBinding.offset + sizeof(hlsl::TraceRaysIndirectCommand_t) > indirectBinding.buffer->getSize()) + { + NBL_LOG_ERROR("buffer size - offset must be at least the size of TraceRaysIndirectCommand_t!"); + return false; + } + + // https://docs.vulkan.org/spec/latest/chapters/raytracing.html#VUID-vkCmdTraceRaysIndirect2KHR-None-09458 + if (m_boundRayTracingPipeline->getCachedCreationParams().dynamicStackSize && !m_haveRtPipelineStackSize) + { + NBL_LOG_ERROR("no setRayTracingPipelineStackSize command submitted before traceRays command with dynamic stack size pipeline!"); + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, + core::smart_refctd_ptr(indirectBinding.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + + return traceRaysIndirect_impl(indirectBinding); +} + bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT|queue_flags_t::GRAPHICS_BIT|queue_flags_t::TRANSFER_BIT)) diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 94ae6b48e7..b3fb989a8a 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -942,4 +942,90 @@ bool ILogicalDevice::createGraphicsPipelines( return true; } +bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output) +{ + std::fill_n(output,params.size(),nullptr); + IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params,[this](const IGPUShader::SSpecInfo& info)->bool + { + if (!info.shader->wasCreatedBy(this)) + { + NBL_LOG_ERROR("The shader was not created by this device"); + return false; + } + return true; + }); + if (!specConstantValidation) + { + NBL_LOG_ERROR("Invalid parameters were given"); + return false; + } + + const auto& features = getEnabledFeatures(); + + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-vkCreateRayTracingPipelinesKHR-rayTracingPipeline-03586 + if (!features.rayTracingPipeline) + { + NBL_LOG_ERROR("Feature `ray tracing pipeline` is not enabled"); + return false; + } + + for (const auto& param : params) + { + const bool skipAABBs = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS); + const bool skipBuiltin = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES); + + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 + if (skipAABBs && skipBuiltin) + { + NBL_LOG_ERROR("Flags must not include both SKIP_AABBS and SKIP_BUILT_IN_PRIMITIVE!"); + return false; + } + + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03596 + if (skipAABBs && !features.rayTraversalPrimitiveCulling) + { + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); + return false; + } + + // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 + if (skipBuiltin && !features.rayTraversalPrimitiveCulling) + { + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); + return false; + } + + } + + const auto& limits = getPhysicalDeviceLimits(); + for (const auto& param : params) + { + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-maxPipelineRayRecursionDepth-03589 + if (param.cached.maxRecursionDepth > limits.maxRayRecursionDepth) + { + NBL_LOG_ERROR("Invalid maxRecursionDepth. maxRecursionDepth(%u) exceed the limits(%u)", param.cached.maxRecursionDepth, limits.maxRayRecursionDepth); + return false; + } + if (param.getShaders().empty()) + { + NBL_LOG_ERROR("Pipeline must have at least one shader."); + return false; + } + } + + createRayTracingPipelines_impl(pipelineCache,params,output,specConstantValidation); + + bool retval = true; + for (auto i=0u; i