Skip to content

Acceleration Structure Asset Conversion #869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
2 changes: 1 addition & 1 deletion examples_tests
6 changes: 2 additions & 4 deletions include/nbl/asset/IAccelerationStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class IBottomLevelAccelerationStructure : public AccelerationStructure
// Provided by VK_NV_displacement_micromap
ALLOW_DISPLACEMENT_MICROMAP_UPDATE_BIT = 0x1u<<9u,
// Provided by VK_KHR_ray_tracing_position_fetch
ALLOW_DATA_ACCESS_KHR = 0x1u<<11u,
ALLOW_DATA_ACCESS = 0x1u<<11u,
};
static inline bool validBuildFlags(const core::bitflag<BUILD_FLAGS> flags)
{
Expand Down Expand Up @@ -154,8 +154,6 @@ class ITopLevelAccelerationStructure : public AccelerationStructure
PREFER_FAST_TRACE_BIT = 0x1u<<2u,
PREFER_FAST_BUILD_BIT = 0x1u<<3u,
LOW_MEMORY_BIT = 0x1u<<4u,
// Synthetic flag we use to indicate `VkAccelerationStructureGeometryInstancesDataKHR::arrayOfPointers`
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u, // this flag really shouldn't be settable outside of `video::IGPU`
// Provided by VK_NV_ray_tracing_motion_blur, but we always override and deduce from creation flag because of
// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureBuildGeometryInfoKHR-dstAccelerationStructure-04927
//MOTION_BIT = 0x1u<<5u,
Expand Down Expand Up @@ -237,7 +235,7 @@ class ITopLevelAccelerationStructure : public AccelerationStructure
static_assert(alignof(Instance<blas_ref_t>)==8ull);
};

// enum for distinguishing unions of Instance Types when there is no `INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB` in build flags
// enum for distinguishing unions of Instance Types when using a polymorphic instance
enum class INSTANCE_TYPE : uint32_t
{
// StaticInstance
Expand Down
4 changes: 1 addition & 3 deletions include/nbl/asset/ICPUAccelerationStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,6 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
if(!isMutable())
return;
m_buildFlags = buildFlags;
// we always clear this flag as we always store instances as polymorphic for ICPUTopLevelAccelerationStructure
m_buildFlags &= ~BUILD_FLAGS::INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB;
}

//
Expand Down Expand Up @@ -361,7 +359,7 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
inline bool usesMotion() const override
{
for (const auto& instance : *m_instances)
if (instance.getType()!=INSTANCE_TYPE::STATIC)
if (instance.getType()!=INSTANCE_TYPE::STATIC || instance.getBase().blas && instance.getBase().blas->usesMotion())
return true;
return false;
}
Expand Down
34 changes: 19 additions & 15 deletions include/nbl/video/IDeviceMemoryAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
namespace nbl::video
{

class IDeviceMemoryAllocator
class NBL_API2 IDeviceMemoryAllocator
{
public:
// right now we only support this interface handing out memory for one device or group
virtual ILogicalDevice* getDeviceForAllocations() const = 0;

struct SAllocateInfo
{
size_t size : 54 = 0ull;
Expand All @@ -21,6 +24,21 @@ class IDeviceMemoryAllocator
// size_t opaqueCaptureAddress = 0u; Note that this mechanism is intended only to support capture/replay tools, and is not recommended for use in other applications.
};

struct SAllocation
{
static constexpr size_t InvalidMemoryOffset = 0xdeadbeefBadC0ffeull;
bool isValid() const
{
return memory && (offset!=InvalidMemoryOffset);
}

core::smart_refctd_ptr<IDeviceMemoryAllocation> memory = nullptr;
size_t offset = InvalidMemoryOffset;
};

virtual SAllocation allocate(const SAllocateInfo& info) = 0;


//! IMemoryTypeIterator extracts memoryType indices from memoryTypeBits in arbitrary order
//! which is used to give priority to memoryTypes in try-allocate usages where allocations may fail with some memoryTypes
//! IMemoryTypeIterator will construct SAllocateInfo from object's memory requirements, allocateFlags and dedication using operator()
Expand Down Expand Up @@ -85,20 +103,6 @@ class IDeviceMemoryAllocator

uint32_t currentIndex = 0u;
};


struct SAllocation
{
static constexpr size_t InvalidMemoryOffset = 0xdeadbeefBadC0ffeull;
bool isValid() const
{
return memory && (offset!=InvalidMemoryOffset);
}

core::smart_refctd_ptr<IDeviceMemoryAllocation> memory = nullptr;
size_t offset = InvalidMemoryOffset;
};
virtual SAllocation allocate(const SAllocateInfo& info) = 0;

template<class memory_type_iterator_t=DefaultMemoryTypeIterator>
inline SAllocation allocate(
Expand Down
23 changes: 13 additions & 10 deletions include/nbl/video/IDeviceMemoryBacked.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@ class IDeviceMemoryBacked : public IBackendObject
{
public:
//!
struct SCachedCreationParams
constexpr static inline uint8_t MaxQueueFamilies = 7;
struct SCreationParams
{
// A Pre-Destroy-Step is called out just before a `vkDestory` or `glDelete`, this is only useful for "imported" resources
std::unique_ptr<ICleanup> preDestroyCleanup = nullptr;
// A Post-Destroy-Step is called in this class' destructor, this is only useful for "imported" resources
std::unique_ptr<ICleanup> postDestroyCleanup = nullptr;
// If more than one, then we're doing concurrent resource sharing
uint8_t queueFamilyIndexCount = 0u;
const uint32_t* queueFamilyIndices = nullptr;
// Thus the destructor will skip the call to `vkDestroy` or `glDelete` on the handle, this is only useful for "imported" objects
bool skipHandleDestroy = false;

Expand All @@ -43,7 +45,8 @@ class IDeviceMemoryBacked : public IBackendObject
return queueFamilyIndexCount>1u;
}
};
inline const SCachedCreationParams& getCachedCreationParams() const {return m_cachedCreationParams;}
// TODO: change name later on, but right now too much code to refactor
inline const SCreationParams& getCachedCreationParams() const {return m_cachedCreationParams;}

//! We need to know to cast to `IGPUBuffer` or `IGPUImage`
enum E_OBJECT_TYPE : bool
Expand Down Expand Up @@ -87,15 +90,14 @@ class IDeviceMemoryBacked : public IBackendObject
//! Returns the allocation which is bound to the resource
virtual SMemoryBinding getBoundMemory() const = 0;

//! For constructor parameter only
struct SCreationParams : SCachedCreationParams
{
const uint32_t* queueFamilyIndices = nullptr;
};

protected:
inline IDeviceMemoryBacked(core::smart_refctd_ptr<const ILogicalDevice>&& originDevice, SCreationParams&& creationParams, const SDeviceMemoryRequirements& reqs)
: IBackendObject(std::move(originDevice)), m_cachedCreationParams(std::move(creationParams)), m_cachedMemoryReqs(reqs) {}
: IBackendObject(std::move(originDevice)), m_cachedCreationParams(std::move(creationParams)), m_cachedMemoryReqs(reqs)
{
std::fill_n(m_queueFamilies,MaxQueueFamilies,~0u);
std::copy_n(m_cachedCreationParams.queueFamilyIndices,m_cachedCreationParams.queueFamilyIndexCount,m_queueFamilies);
m_cachedCreationParams.queueFamilyIndices = m_queueFamilies;
}
inline virtual ~IDeviceMemoryBacked()
{
assert(!m_cachedCreationParams.preDestroyCleanup); // derived class should have already cleared this out
Expand All @@ -109,8 +111,9 @@ class IDeviceMemoryBacked : public IBackendObject


//! members
SCachedCreationParams m_cachedCreationParams;
SCreationParams m_cachedCreationParams;
SDeviceMemoryRequirements m_cachedMemoryReqs;
uint32_t m_queueFamilies[MaxQueueFamilies];
};

template<typename T>
Expand Down
9 changes: 5 additions & 4 deletions include/nbl/video/IGPUAccelerationStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
if (buildRangeInfo.instanceCount>dstAS->getMaxInstanceCount())
return false;

const bool arrayOfPointers = buildFlags.hasFlags(BUILD_FLAGS::INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB);
const bool arrayOfPointers = instanceDataTypeEncodedInPointersLSB;
constexpr bool HostBuild = std::is_same_v<BufferType,asset::ICPUBuffer>;
// I'm not gonna do the `std::conditional_t<HostBuild,,>` to get the correct Instance struct type as they're the same size essentially
const size_t instanceSize = arrayOfPointers ? sizeof(void*):(
Expand Down Expand Up @@ -467,11 +467,13 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr


core::bitflag<BUILD_FLAGS> buildFlags = BUILD_FLAGS::PREFER_FAST_BUILD_BIT;
// What we use to indicate `VkAccelerationStructureGeometryInstancesDataKHR::arrayOfPointers`
uint8_t instanceDataTypeEncodedInPointersLSB : 1 = false;
const IGPUTopLevelAccelerationStructure* srcAS = nullptr;
IGPUTopLevelAccelerationStructure* dstAS = nullptr;
// depending on the presence certain bits in `buildFlags` this buffer will be filled with:
// depending on value of certain build info members this buffer will be filled with:
// - addresses to `StaticInstance`, `MatrixMotionInstance`, `SRTMotionInstance` packed in upper 60 bits
// and struct type in lower 4 bits if and only if `buildFlags.hasFlags(INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB)`, otherwise:
// and struct type in lower 4 bits if and only if `instanceDataTypeEncodedInPointersLSB`, otherwise:
// + an array of `PolymorphicInstance` if our `SCreationParams::flags.hasFlags(MOTION_BIT)`, otherwise
// + an array of `StaticInstance`
asset::SBufferBinding<const BufferType> instanceData = {};
Expand All @@ -482,7 +484,6 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
using HostBuildInfo = BuildInfo<asset::ICPUBuffer>;

//! BEWARE, OUR RESOURCE LIFETIME TRACKING DOES NOT WORK ACROSS TLAS->BLAS boundaries with these types of BLAS references!
// TODO: Investigate `EXT_private_data` to be able to go ` -> IGPUBottomLevelAccelerationStructure` on Host Builds
using DeviceInstance = Instance<IGPUBottomLevelAccelerationStructure::device_op_ref_t>;
using HostInstance = Instance<IGPUBottomLevelAccelerationStructure::host_op_ref_t>;
static_assert(sizeof(DeviceInstance)==sizeof(HostInstance));
Expand Down
16 changes: 14 additions & 2 deletions include/nbl/video/ILogicalDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ class IPhysicalDevice;
class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMemoryAllocator
{
public:
constexpr static inline uint8_t MaxQueueFamilies = 7;
inline ILogicalDevice* getDeviceForAllocations() const override {return const_cast<ILogicalDevice*>(this);}

constexpr static inline uint8_t MaxQueueFamilies = IDeviceMemoryBacked::MaxQueueFamilies;
struct SQueueCreationParams
{
constexpr static inline uint8_t MaxQueuesInFamily = 15;
Expand Down Expand Up @@ -331,6 +333,11 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit (%u)!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize);
return nullptr;
}
if (creationParams.queueFamilyIndexCount>MaxQueueFamilies)
{
m_logger.log("Failed to create Buffer, queue family count %d for concurrent sharing larger than our max %d!",system::ILogger::ELL_ERROR,creationParams.queueFamilyIndexCount,MaxQueueFamilies);
return nullptr;
}
return createBuffer_impl(std::move(creationParams));
}
// Create a BufferView, to a shader; a fake 1D-like texture with no interpolation (@see ICPUBufferView)
Expand All @@ -343,7 +350,12 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
m_logger.log("Failed to create Image, invalid creation parameters!",system::ILogger::ELL_ERROR);
return nullptr;
}
// TODO: @Cyprian validation of creationParams against the device's limits (sample counts, etc.) see vkCreateImage
if (creationParams.queueFamilyIndexCount>MaxQueueFamilies)
{
m_logger.log("Failed to create Image, queue family count %d for concurrent sharing larger than our max %d!",system::ILogger::ELL_ERROR,creationParams.queueFamilyIndexCount,MaxQueueFamilies);
return nullptr;
}
// TODO: validation of creationParams against the device's limits (sample counts, etc.) see vkCreateImage docs
return createImage_impl(std::move(creationParams));
}
// Create an ImageView that can actually be used by shaders (@see ICPUImageView)
Expand Down
2 changes: 1 addition & 1 deletion include/nbl/video/alloc/CAsyncSingleBufferSubAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class CAsyncSingleBufferSubAllocator
template<typename... Args>
inline size_type multi_allocate(uint32_t count, Args&&... args) noexcept
{
return multi_alloc(decltype(deferredFrees)::default_wait(),count,std::forward<Args>(args)...);
return multi_allocate(TimelineEventHandlerBase::default_wait(),count,std::forward<Args>(args)...);
}
//! attempt to allocate, if fail (presumably because of fragmentation), then keep trying till timeout is reached
template<class Clock=typename std::chrono::steady_clock, typename... Args>
Expand Down
2 changes: 1 addition & 1 deletion include/nbl/video/asset_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ struct asset_traits<asset::ICPUBottomLevelAccelerationStructure>
// we don't need to descend during DFS into other assets
constexpr static inline bool HasChildren = true;
// the video type
using video_t = IGPUImageView;
using video_t = IGPUBottomLevelAccelerationStructure;
// lookup type
using lookup_t = const video_t*;
};
Expand Down
Loading