diff --git a/examples_tests b/examples_tests index 69ba991ea4..fbf674031e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 69ba991ea4827c80d008a31256785f4c4c60f12d +Subproject commit fbf674031e2f16b2ee79305094ad3a45b6051c6c diff --git a/include/nbl/application_templates/BasicMultiQueueApplication.hpp b/include/nbl/application_templates/BasicMultiQueueApplication.hpp index b4d9f1b843..b0a8f98cda 100644 --- a/include/nbl/application_templates/BasicMultiQueueApplication.hpp +++ b/include/nbl/application_templates/BasicMultiQueueApplication.hpp @@ -49,7 +49,7 @@ class BasicMultiQueueApplication : public virtual MonoDeviceApplication return false; using namespace core; - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger)); + m_utils = video::IUtilities::create(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger)); if (!m_utils) return logFail("Failed to create nbl::video::IUtilities!"); diff --git a/include/nbl/core/containers/DoublyLinkedList.h b/include/nbl/core/containers/DoublyLinkedList.h index 4dcea9bee0..af7edcc01a 100644 --- a/include/nbl/core/containers/DoublyLinkedList.h +++ b/include/nbl/core/containers/DoublyLinkedList.h @@ -69,6 +69,14 @@ template + class Iterator; + template + friend class Iterator; + + using iterator = Iterator; + using const_iterator = Iterator; + using allocator_t = allocator; using allocator_traits_t = std::allocator_traits; using address_allocator_t = PoolAddressAllocator; @@ -233,16 +241,43 @@ class DoublyLinkedList // Offset the array start by the storage used by the address allocator m_array = reinterpret_cast(reinterpret_cast(m_reservedSpace) + addressAllocatorStorageSize * sizeof(node_t)); - m_addressAllocator = address_allocator_t(m_reservedSpace, 0u, 0u, 1u, capacity, 1u); // If allocation failed, create list with no capacity to indicate creation failed m_cap = m_reservedSpace ? capacity : 0; - m_back = invalid_iterator; - m_begin = invalid_iterator; + m_addressAllocator = address_allocator_t(m_reservedSpace, 0u, 0u, 1u, m_cap, 1u); } DoublyLinkedList() = default; - DoublyLinkedList(const DoublyLinkedList& other) = delete; + // Copy Constructor + explicit DoublyLinkedList(const DoublyLinkedList& other) : m_dispose_f(other.m_dispose_f), m_allocator(other.m_allocator) + { + const size_t addressAllocatorStorageSize = (address_allocator_t::reserved_size(1u, other.m_cap, 1u) + sizeof(node_t) - 1) / sizeof(node_t); + m_currentAllocationSize = addressAllocatorStorageSize + other.m_cap; + m_reservedSpace = reinterpret_cast(allocator_traits_t::allocate(m_allocator, m_currentAllocationSize)); + // If allocation failed, create a list with no capacity + m_cap = m_reservedSpace ? other.m_cap : 0; + if (!m_cap) return; // Allocation failed + // Offset the array start by the storage used by the address allocator + m_array = reinterpret_cast(reinterpret_cast(m_reservedSpace) + addressAllocatorStorageSize * sizeof(node_t)); + + if constexpr (std::is_trivially_copyable_v) + { + // Create new address allocator by copying state + m_addressAllocator = address_allocator_t(m_cap, other.m_addressAllocator, m_reservedSpace); + // Copy memory over + memcpy(m_array, other.m_array, m_cap * sizeof(node_t)); + m_back = other.m_back; + m_begin = other.m_begin; + } + else + { + m_addressAllocator = address_allocator_t(m_reservedSpace, 0u, 0u, 1u, m_cap, 1u); + // Reverse iteration since we push from the front + for (auto it = other.crbegin(); it != other.crend(); it++) + pushFront(value_t(*it)); + + } + } DoublyLinkedList& operator=(const DoublyLinkedList& other) = delete; @@ -273,6 +308,16 @@ class DoublyLinkedList } } + // Iterator stuff + iterator begin(); + iterator end(); + const_iterator cbegin() const; + const_iterator cend() const; + std::reverse_iterator rbegin(); + std::reverse_iterator rend(); + std::reverse_iterator crbegin() const; + std::reverse_iterator crend() const; + private: //allocate and get the address of the next free node inline uint32_t reserveAddress() @@ -339,14 +384,130 @@ class DoublyLinkedList node_t* m_array; uint32_t m_cap; - uint32_t m_back; - uint32_t m_begin; + uint32_t m_back = invalid_iterator; + uint32_t m_begin = invalid_iterator; disposal_func_t m_dispose_f; }; +// ---------------------------------------------------- ITERATOR ----------------------------------------------------------- + +// Satifies std::bidirectional_iterator +template +template +class DoublyLinkedList::Iterator +{ + using base_iterable_t = DoublyLinkedList; + using iterable_t = std::conditional_t; + friend class base_iterable_t; +public: + using value_type = std::conditional_t; + using pointer = value_type*; + using reference = value_type&; + using difference_type = int32_t; + + Iterator() = default; + + // Prefix + Iterator& operator++() + { + m_current = m_iterable->get(m_current)->next; + return *this; + } + + Iterator& operator--() + { + m_current = m_current != invalid_iterator ? m_iterable->get(m_current)->prev : m_iterable->m_back; + return *this; + } + + // Postfix + Iterator operator++(int) + { + Iterator beforeIncrement = *this; + operator++(); + return beforeIncrement; + } + + Iterator operator--(int) + { + Iterator beforeDecrement = *this; + operator--(); + return beforeDecrement; + } + // Comparison + bool operator==(const Iterator& rhs) const + { + return m_iterable == rhs.m_iterable && m_current == rhs.m_current; + } + + //Deref + reference operator*() const + { + return m_iterable->get(m_current)->data; + } + + pointer operator->() const + { + return & operator*(); + } +private: + Iterator(iterable_t* const iterable, uint32_t idx) : m_iterable(iterable), m_current(idx) {} + + iterable_t* m_iterable; + uint32_t m_current; +}; + +template +DoublyLinkedList::iterator DoublyLinkedList::begin() +{ + return iterator(this, m_begin); +} + +template +DoublyLinkedList::const_iterator DoublyLinkedList::cbegin() const +{ + return const_iterator(this, m_begin); +} + +template +DoublyLinkedList::iterator DoublyLinkedList::end() +{ + return iterator(this, invalid_iterator); } + +template +DoublyLinkedList::const_iterator DoublyLinkedList::cend() const +{ + return const_iterator(this, invalid_iterator); } +template +std::reverse_iterator::iterator> DoublyLinkedList::rbegin() +{ + return std::reverse_iterator(iterator(this, invalid_iterator)); +} + +template +std::reverse_iterator::const_iterator> DoublyLinkedList::crbegin() const +{ + return std::reverse_iterator(const_iterator(this, invalid_iterator)); +} + +template +std::reverse_iterator::iterator> DoublyLinkedList::rend() +{ + return std::reverse_iterator(iterator(this, m_begin)); +} + +template +std::reverse_iterator::const_iterator> DoublyLinkedList::crend() const +{ + return std::reverse_iterator(const_iterator(this, m_begin)); +} + +} //namespace core +} //namespace nbl + #endif diff --git a/include/nbl/core/containers/LRUCache.h b/include/nbl/core/containers/LRUCache.h index 25554d60b2..e4d78c94e7 100644 --- a/include/nbl/core/containers/LRUCache.h +++ b/include/nbl/core/containers/LRUCache.h @@ -38,6 +38,8 @@ class LRUCacheBase LRUCacheBase(const uint32_t capacity, MapHash&& _hash, MapEquals&& _equals, disposal_func_t&& df) : m_list(capacity, std::move(df)), m_hash(std::move(_hash)), m_equals(std::move(_equals)), searchedKey(nullptr) { } + LRUCacheBase(const LRUCacheBase& other) : m_list(other.m_list), m_hash(other.m_hash), m_equals(other.m_equals), searchedKey(nullptr) {} + public: inline const Key& getReference(const uint32_t nodeAddr) const { @@ -221,6 +223,19 @@ class [[deprecated]] LRUCache : protected impl::LRUCacheBase m_shortcut_map; }; +namespace impl +{ + template + concept LRUCacheValueEvictionCallback = std::invocable; + + template + concept LRUCacheKeyValueEvictionCallback = std::invocable; + + template + concept LRUCacheInsertEvictionCallback = LRUCacheValueEvictionCallback + || LRUCacheKeyValueEvictionCallback; +} //namespace impl + // Key-Value Least Recently Used cache // Capacity can be increased at user's will // When the cache is full inserting will remove the least used entry @@ -228,7 +243,7 @@ template, typena class ResizableLRUCache : protected impl::LRUCacheBase > >, public core::Unmovable, public core::Uncopyable { // typedefs - using list_t = DoublyLinkedList >; + using list_t = DoublyLinkedList>; using base_t = impl::LRUCacheBase; using this_t = ResizableLRUCache; @@ -277,6 +292,10 @@ class ResizableLRUCache : protected impl::LRUCacheBase> 2, WrapHash{this}, WrapEquals{this}) + { + m_shortcut_map.reserve(m_capacity); + } + inline void print(core::smart_refctd_ptr logger) { logger->log("Printing LRU cache contents"); @@ -323,7 +351,7 @@ class ResizableLRUCache : protected impl::LRUCacheBase EvictionCallback> requires std::is_constructible_v // && (std::is_same_v || std::is_assignable_v) // is_assignable_v returns false :( + template requires std::is_constructible_v && impl::LRUCacheInsertEvictionCallback// && (std::is_same_v || std::is_assignable_v) // is_assignable_v returns false :( inline Value* insert(K&& k, V&& v, EvictionCallback&& evictCallback) { bool success; @@ -336,10 +364,18 @@ class ResizableLRUCache : protected impl::LRUCacheBase= base_t::m_list.getCapacity(); + const bool overflow = size() >= base_t::m_list.getCapacity(); if (overflow) { - evictCallback(base_t::m_list.getBack()->data.second); + if constexpr (impl::LRUCacheValueEvictionCallback) + { + evictCallback(base_t::m_list.getBack()->data.second); + } + // LRUCacheKeyValueEvictionCallback + else + { + evictCallback(base_t::m_list.getBack()->data.first, base_t::m_list.getBack()->data.second); + } m_shortcut_map.erase(base_t::m_list.getLastAddress()); base_t::m_list.popBack(); } @@ -389,7 +425,7 @@ class ResizableLRUCache : protected impl::LRUCacheBase 0) + return &base_t::m_list.getBack()->data.first; + else + return nullptr; + } + + inline size_t size() const { return m_shortcut_map.size(); } + + inline bool empty() const { return size() <= 0ull; } /** * @brief Resizes the cache by extending its capacity so it can hold more elements. Returns a bool indicating if capacity was indeed increased. @@ -427,6 +477,17 @@ class ResizableLRUCache : protected impl::LRUCacheBase LRU + iterator begin() { return base_t::m_list.begin(); } + iterator end() { return base_t::m_list.end(); } + const_iterator cbegin() const { return base_t::m_list.cbegin(); } + const_iterator cend() const { return base_t::m_list.cend(); } + std::reverse_iterator rbegin() { return base_t::m_list.rbegin(); } + std::reverse_iterator rend() { return base_t::m_list.rend(); } + std::reverse_iterator crbegin() const { return base_t::m_list.crbegin(); } + std::reverse_iterator crend() const { return base_t::m_list.crend(); } + protected: unordered_set m_shortcut_map; uint32_t m_capacity; diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 6e90c9f50c..e0d23ea8b1 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -28,11 +28,18 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted class DeferredFreeFunctor { public: - inline DeferredFreeFunctor(SubAllocatedDescriptorSet* composed, uint32_t binding, size_type count, const value_type* addresses) - : m_addresses(std::move(core::make_refctd_dynamic_array>(count))), - m_binding(binding), m_composed(composed) + using ref_t = core::smart_refctd_ptr; + + template requires std::is_base_of_v + inline DeferredFreeFunctor(SubAllocatedDescriptorSet* composed, uint32_t binding, size_type count, const value_type* addresses, const T*const *const objectsToHold) + : m_addresses(core::make_refctd_dynamic_array>(count)) + , m_objectsToHold(core::make_refctd_dynamic_array>(count)) + , m_binding(binding) + , m_composed(composed) { memcpy(m_addresses->data(), addresses, count * sizeof(value_type)); + for (size_t i=0u; i m_addresses; + core::smart_refctd_dynamic_array m_objectsToHold; SubAllocatedDescriptorSet* m_composed; // TODO: shouldn't be called `composed`, maybe `parent` or something uint32_t m_binding; }; @@ -209,9 +219,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted remainingFrees = cull_frees(); } while (remainingFrees > 0); - for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) + for (auto& it : m_allocatableRanges) { - auto& range = m_allocatableRanges[i]; + auto& range = it.second; if (range.reservedSize == 0) continue; assert(range.eventHandler->getTimelines().size() == 0); @@ -355,10 +365,11 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted } // defers based on the conservative estimation if `futureWait` needs to be waited on, if doesn't will call nullify descriiptors internally immediately - inline void multi_deallocate(uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept + template + inline void multi_deallocate(uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait, const T*const *const objectsToDrop=nullptr) noexcept { if (futureWait.semaphore) - multi_deallocate(binding, futureWait, DeferredFreeFunctor(this, binding, count, addr)); + multi_deallocate(binding, futureWait, DeferredFreeFunctor(this, binding, count, addr, objectsToDrop)); else { core::vector nulls(count); @@ -376,10 +387,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted uint32_t frees = 0; core::vector nulls(m_totalDeferredFrees); auto outNulls = nulls.data(); - for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) + for (auto& it : m_allocatableRanges) { - auto& it = m_allocatableRanges[i]; - frees += it.eventHandler->poll(outNulls).eventsLeft; + frees += it.second.eventHandler->poll(outNulls).eventsLeft; } getDevice()->nullifyDescriptors({nulls.data(),outNulls}); return frees; diff --git a/include/nbl/video/utilities/IUtilities.h b/include/nbl/video/utilities/IUtilities.h index 00776ba01d..af16859c08 100644 --- a/include/nbl/video/utilities/IUtilities.h +++ b/include/nbl/video/utilities/IUtilities.h @@ -19,26 +19,40 @@ namespace nbl::video class NBL_API2 IUtilities : public core::IReferenceCounted { - protected: +protected: constexpr static inline uint32_t maxStreamingBufferAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head constexpr static inline uint32_t minStreamingBufferAllocationSize = 1024u; constexpr static inline uint32_t OptimalCoalescedInvocationXferSize = sizeof(uint32_t); - uint32_t m_allocationAlignment = 0u; - uint32_t m_allocationAlignmentForBufferImageCopy = 0u; + IUtilities( core::smart_refctd_ptr&& device, + nbl::system::logger_opt_smart_ptr&& logger, + core::smart_refctd_ptr >&& defaultUploadBuffer, + core::smart_refctd_ptr >&& defaultDownloadBuffer, + uint32_t allocationAlignment, + uint32_t allocationAlignmentForBufferImageCopy) + : m_device(std::move(device)) + , m_logger(nbl::system::logger_opt_smart_ptr(logger)) + , m_defaultUploadBuffer(std::move(defaultUploadBuffer)) + , m_defaultDownloadBuffer(std::move(defaultDownloadBuffer)) + , m_allocationAlignment(allocationAlignment) + , m_allocationAlignmentForBufferImageCopy(allocationAlignmentForBufferImageCopy) + { + m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + } + + IUtilities() = delete; - nbl::system::logger_opt_smart_ptr m_logger; +public: - public: - IUtilities(core::smart_refctd_ptr&& device, nbl::system::logger_opt_smart_ptr&& logger=nullptr, const uint32_t downstreamSize=0x4000000u, const uint32_t upstreamSize=0x4000000u) - : m_device(core::smart_refctd_ptr(device)), m_logger(nbl::system::logger_opt_smart_ptr(logger)) + static core::smart_refctd_ptr create(core::smart_refctd_ptr&& device, nbl::system::logger_opt_smart_ptr&& logger = nullptr, const uint32_t downstreamSize = 0x4000000u, const uint32_t upstreamSize = 0x4000000u) { - auto physicalDevice = m_device->getPhysicalDevice(); + auto physicalDevice = device->getPhysicalDevice(); const auto& limits = physicalDevice->getLimits(); - + auto queueFamProps = physicalDevice->getQueueFamilyProperties(); uint32_t minImageTransferGranularityVolume = 1u; // minImageTransferGranularity.width * height * depth - + for (auto& qf : queueFamProps) { uint32_t volume = qf.minImageTransferGranularity.width*qf.minImageTransferGranularity.height*qf.minImageTransferGranularity.depth; @@ -46,10 +60,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted minImageTransferGranularityVolume = volume; } - // host-mapped device memory needs to have this alignment in flush/invalidate calls, therefore this is the streaming buffer's "allocationAlignment". - m_allocationAlignment = limits.nonCoherentAtomSize; - m_allocationAlignmentForBufferImageCopy = core::max(limits.optimalBufferCopyOffsetAlignment,m_allocationAlignment); - + const uint32_t allocationAlignment = limits.nonCoherentAtomSize; + const uint32_t allocationAlignmentForBufferImageCopy = core::max(limits.optimalBufferCopyOffsetAlignment,allocationAlignment); + const uint32_t bufferOptimalTransferAtom = limits.maxResidentInvocations * OptimalCoalescedInvocationXferSize; const uint32_t maxImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R64G64B64A64_SFLOAT).getBlockByteSize() * minImageTransferGranularityVolume; const uint32_t minImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R8_UINT).getBlockByteSize(); @@ -57,16 +70,19 @@ class NBL_API2 IUtilities : public core::IReferenceCounted const uint32_t minOptimalTransferAtom = core::min(bufferOptimalTransferAtom,minImageOptimalTransferAtom); // allocationAlignment <= minBlockSize <= minOptimalTransferAtom <= maxOptimalTransferAtom - assert(m_allocationAlignment <= minStreamingBufferAllocationSize); - assert(m_allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize); - - assert(minStreamingBufferAllocationSize <= minOptimalTransferAtom); - assert(minOptimalTransferAtom <= maxOptimalTransferAtom); - - assert(minStreamingBufferAllocationSize % m_allocationAlignment == 0u); - assert(minStreamingBufferAllocationSize % m_allocationAlignmentForBufferImageCopy == 0u); + + const bool transferConstaintsSatisfied = + (allocationAlignment <= minStreamingBufferAllocationSize) && + (allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize) && + (minStreamingBufferAllocationSize <= minOptimalTransferAtom) && + (minOptimalTransferAtom <= maxOptimalTransferAtom) && + (minStreamingBufferAllocationSize % allocationAlignment == 0u) && + (minStreamingBufferAllocationSize % allocationAlignmentForBufferImageCopy == 0u); + + if (!transferConstaintsSatisfied) + return nullptr; - const auto& enabledFeatures = m_device->getEnabledFeatures(); + const auto& enabledFeatures = device->getEnabledFeatures(); IGPUBuffer::SCreationParams streamingBufferCreationParams = {}; auto commonUsages = core::bitflag(IGPUBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT)|IGPUBuffer::EUF_STORAGE_BUFFER_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; @@ -74,7 +90,11 @@ class NBL_API2 IUtilities : public core::IReferenceCounted commonUsages |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT; core::bitflag allocateFlags(IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + + core::smart_refctd_ptr > defaultUploadBuffer = nullptr; + core::smart_refctd_ptr > defaultDownloadBuffer = nullptr; + // Try Create Download Buffer { IGPUBuffer::SCreationParams streamingBufferCreationParams = {}; streamingBufferCreationParams.size = downstreamSize; @@ -82,12 +102,19 @@ class NBL_API2 IUtilities : public core::IReferenceCounted streamingBufferCreationParams.usage = commonUsages|IGPUBuffer::EUF_TRANSFER_DST_BIT; if (enabledFeatures.conditionalRendering) streamingBufferCreationParams.usage |= IGPUBuffer::EUF_CONDITIONAL_RENDERING_BIT_EXT; - auto buffer = m_device->createBuffer(std::move(streamingBufferCreationParams)); + auto buffer = device->createBuffer(std::move(streamingBufferCreationParams)); auto reqs = buffer->getMemoryReqs(); reqs.memoryTypeBits &= physicalDevice->getDownStreamingMemoryTypeBits(); - auto memOffset = m_device->allocate(reqs, buffer.get(), allocateFlags); - auto mem = memOffset.memory; + auto deviceMemAllocation = device->allocate(reqs, buffer.get(), allocateFlags); + + if (!deviceMemAllocation.isValid()) + { + // allocation failed + return nullptr; + } + + auto mem = deviceMemAllocation.memory; core::bitflag access(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); const auto memProps = mem->getMemoryPropertyFlags(); @@ -98,9 +125,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted assert(access.value); mem->map({0ull,reqs.size},access); - m_defaultDownloadBuffer = core::make_smart_refctd_ptr>(asset::SBufferRange{0ull,downstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize); - m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + defaultDownloadBuffer = core::make_smart_refctd_ptr>(asset::SBufferRange{0ull,downstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize); } + // Try Create Upload Buffer { IGPUBuffer::SCreationParams streamingBufferCreationParams = {}; streamingBufferCreationParams.size = upstreamSize; @@ -109,13 +136,19 @@ class NBL_API2 IUtilities : public core::IReferenceCounted streamingBufferCreationParams.usage |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT; if (enabledFeatures.rayTracingPipeline) streamingBufferCreationParams.usage |= IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; - auto buffer = m_device->createBuffer(std::move(streamingBufferCreationParams)); + auto buffer = device->createBuffer(std::move(streamingBufferCreationParams)); auto reqs = buffer->getMemoryReqs(); reqs.memoryTypeBits &= physicalDevice->getUpStreamingMemoryTypeBits(); - auto memOffset = m_device->allocate(reqs, buffer.get(), allocateFlags); + auto deviceMemAllocation = device->allocate(reqs, buffer.get(), allocateFlags); + + if (!deviceMemAllocation.isValid()) + { + // allocation failed + return nullptr; + } - auto mem = memOffset.memory; + auto mem = deviceMemAllocation.memory; core::bitflag access(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); const auto memProps = mem->getMemoryPropertyFlags(); if (memProps.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) @@ -125,9 +158,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted assert(access.value); mem->map({0ull,reqs.size},access); - m_defaultUploadBuffer = core::make_smart_refctd_ptr>(asset::SBufferRange{0ull,upstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize); - m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + defaultUploadBuffer = core::make_smart_refctd_ptr>(asset::SBufferRange{0ull,upstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize); } + #if 0 // TODO: port m_propertyPoolHandler = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_device)); // smaller workgroups fill occupancy gaps better, especially on new Nvidia GPUs, but we don't want too small workgroups on mobile @@ -135,6 +168,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted const auto scan_workgroup_size = core::max(core::roundDownToPoT(limits.maxWorkgroupSize[0]) >> 1u, 128u); m_scanner = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_device), scan_workgroup_size); #endif + + return core::smart_refctd_ptr(new IUtilities(std::move(device), std::move(logger), std::move(defaultUploadBuffer), std::move(defaultDownloadBuffer), allocationAlignment, allocationAlignmentForBufferImageCopy), core::dont_grab); + } inline ~IUtilities() @@ -762,12 +798,15 @@ class NBL_API2 IUtilities : public core::IReferenceCounted return retval; } - core::smart_refctd_ptr m_device; + nbl::system::logger_opt_smart_ptr m_logger; core::smart_refctd_ptr > m_defaultDownloadBuffer; core::smart_refctd_ptr > m_defaultUploadBuffer; - + + uint32_t m_allocationAlignment = 0u; + uint32_t m_allocationAlignmentForBufferImageCopy = 0u; + #if 0 // TODO: port core::smart_refctd_ptr m_propertyPoolHandler; core::smart_refctd_ptr m_scanner; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index bb2d6d6cb4..f41c80a684 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -773,7 +773,7 @@ void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSets void CVulkanLogicalDevice::nullifyDescriptors_impl(const SDropDescriptorSetsParams& params) { const auto& drops = params.drops; - if (getEnabledFeatures().nullDescriptor) + if (!getEnabledFeatures().nullDescriptor) { return; } diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 3b7df3a9dd..d7343f9673 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -1564,7 +1564,8 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic enableExtensionIfAvailable(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME); - enableExtensionIfAvailable(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); + VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR fragmentShaderBarycentricFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR, nullptr}; + enableExtensionIfAvailable(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME,&fragmentShaderBarycentricFeatures); VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR subgroupUniformControlFlowFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR,nullptr }; enableExtensionIfAvailable(VK_KHR_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_EXTENSION_NAME,&subgroupUniformControlFlowFeatures); diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp index 350e810bb0..0d0e87e338 100644 --- a/src/nbl/video/IGPUDescriptorSet.cpp +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -174,10 +174,10 @@ void IGPUDescriptorSet::processWrite(const IGPUDescriptorSet::SWriteDescriptorSe for (auto j = 0; j < write.count; ++j) { - descriptors[j] = write.info[j].desc; + descriptors[j + write.arrayElement] = write.info[j].desc; if (mutableSamplers) - mutableSamplers[j] = write.info[j].info.combinedImageSampler.sampler; + mutableSamplers[j + write.arrayElement] = write.info[j].info.combinedImageSampler.sampler; } auto& bindingRedirect = m_layout->getDescriptorRedirect(validationResult.type); auto bindingCreateFlags = bindingRedirect.getCreateFlags(validationResult.descriptorRedirectBindingIndex);