From c1cddc3ad8fac8030121230f7216d9df7f17d945 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 22 Mar 2025 13:34:40 -0400 Subject: [PATCH 1/6] Add support for adding user-specified C/C++ source files to the joltc library --- build.zig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/build.zig b/build.zig index b07370a..411ff00 100644 --- a/build.zig +++ b/build.zig @@ -37,6 +37,12 @@ pub fn build(b: *std.Build) void { ) orelse true, }; + const user_extensions = b.option( + []const std.Build.LazyPath, + "user_extensions", + "List of user source files to add to the joltc library", + ) orelse &.{}; + const options_step = b.addOptions(); inline for (std.meta.fields(@TypeOf(options))) |field| { options_step.addOption(field.type, field.name, @field(options, field.name)); @@ -233,6 +239,13 @@ pub fn build(b: *std.Build) void { .flags = c_flags, }); + for (user_extensions) |user_extension| { + joltc.addCSourceFile(.{ + .file = user_extension, + .flags = c_flags, + }); + } + if (target.result.abi != .msvc or optimize != .Debug) { joltc.addCSourceFiles(.{ .files = &.{ From 7017fa2cf54e31f88f28d9ab1860bcf4ced12122 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 22 Mar 2025 18:24:38 -0400 Subject: [PATCH 2/6] Update Jolt to 600dae51122682afb1df96c392c4acb7f7e31836 --- build.zig | 37 +- libs/Jolt/AABBTree/AABBTreeBuilder.cpp | 123 +-- libs/Jolt/AABBTree/AABBTreeBuilder.h | 48 +- libs/Jolt/AABBTree/AABBTreeToBuffer.h | 165 ++-- .../NodeCodec/NodeCodecQuadTreeHalfFloat.h | 84 +- .../TriangleCodecIndexed8BitPackSOA4Flags.h | 217 +++-- libs/Jolt/ConfigurationString.h | 17 +- libs/Jolt/Core/Array.h | 179 +++- libs/Jolt/Core/BinaryHeap.h | 96 ++ libs/Jolt/Core/ByteBuffer.h | 2 +- libs/Jolt/Core/Color.h | 4 +- libs/Jolt/Core/Core.h | 93 +- libs/Jolt/Core/FPControlWord.h | 8 + libs/Jolt/Core/FPException.h | 34 +- libs/Jolt/Core/FPFlushDenormals.h | 4 +- libs/Jolt/Core/Factory.cpp | 3 + libs/Jolt/Core/FixedSizeFreeList.inl | 12 +- libs/Jolt/Core/HashCombine.h | 181 +++- libs/Jolt/Core/HashTable.h | 872 ++++++++++++++++++ libs/Jolt/Core/IssueReporting.cpp | 4 - libs/Jolt/Core/JobSystem.h | 6 + libs/Jolt/Core/JobSystemThreadPool.cpp | 4 +- libs/Jolt/Core/JobSystemWithBarrier.cpp | 7 +- libs/Jolt/Core/LinearCurve.h | 4 +- libs/Jolt/Core/Memory.h | 6 +- libs/Jolt/Core/MutexArray.h | 2 +- libs/Jolt/Core/Profiler.cpp | 330 ++++++- libs/Jolt/Core/Profiler.inl | 3 +- libs/Jolt/Core/QuickSort.h | 8 +- libs/Jolt/Core/RTTI.h | 6 +- libs/Jolt/Core/Reference.h | 22 +- libs/Jolt/Core/Result.h | 26 +- libs/Jolt/Core/STLLocalAllocator.h | 170 ++++ libs/Jolt/Core/Semaphore.cpp | 72 +- libs/Jolt/Core/Semaphore.h | 55 +- libs/Jolt/Core/StaticArray.h | 46 +- libs/Jolt/Core/StreamIn.h | 3 +- libs/Jolt/Core/StridedPtr.h | 63 ++ libs/Jolt/Core/StringTools.h | 13 - libs/Jolt/Core/TempAllocator.h | 14 +- libs/Jolt/Core/TickCounter.cpp | 4 +- libs/Jolt/Core/TickCounter.h | 4 +- libs/Jolt/Core/UnorderedMap.h | 75 +- libs/Jolt/Core/UnorderedSet.h | 27 +- libs/Jolt/Geometry/AABox.h | 13 +- libs/Jolt/Geometry/ClipPoly.h | 4 +- libs/Jolt/Geometry/ConvexHullBuilder.cpp | 7 +- libs/Jolt/Geometry/ConvexHullBuilder2D.cpp | 2 +- libs/Jolt/Geometry/EPAConvexHullBuilder.h | 5 +- libs/Jolt/Geometry/EPAPenetrationDepth.h | 12 +- libs/Jolt/Geometry/Ellipse.h | 2 +- libs/Jolt/Geometry/GJKClosestPoint.h | 5 +- libs/Jolt/Geometry/IndexedTriangle.h | 31 +- libs/Jolt/Geometry/Indexify.cpp | 10 +- libs/Jolt/Geometry/OrientedBox.h | 2 +- libs/Jolt/Geometry/Plane.h | 15 + libs/Jolt/Geometry/RayAABox.h | 10 +- libs/Jolt/Geometry/RayAABox8.h | 76 -- libs/Jolt/Geometry/RaySphere.h | 4 +- libs/Jolt/Geometry/RayTriangle.h | 6 +- libs/Jolt/Geometry/RayTriangle8.h | 91 -- libs/Jolt/Geometry/Triangle.h | 6 +- libs/Jolt/Math/BVec16.h | 99 ++ libs/Jolt/Math/BVec16.inl | 177 ++++ libs/Jolt/Math/DMat44.h | 2 +- libs/Jolt/Math/DVec3.h | 13 +- libs/Jolt/Math/DVec3.inl | 21 +- libs/Jolt/Math/Double3.h | 4 +- libs/Jolt/Math/EigenValueSymmetric.h | 8 +- libs/Jolt/Math/Float2.h | 2 +- libs/Jolt/Math/Float3.h | 4 +- libs/Jolt/Math/Float4.h | 2 +- libs/Jolt/Math/GaussianElimination.h | 6 +- libs/Jolt/Math/HalfFloat.h | 4 + libs/Jolt/Math/Mat44.h | 2 +- libs/Jolt/Math/Math.h | 13 +- libs/Jolt/Math/MathTypes.h | 6 +- libs/Jolt/Math/Quat.h | 2 +- libs/Jolt/Math/UVec4.h | 6 +- libs/Jolt/Math/UVec4.inl | 13 +- libs/Jolt/Math/UVec8.h | 100 -- libs/Jolt/Math/UVec8.inl | 138 --- libs/Jolt/Math/Vec3.h | 11 +- libs/Jolt/Math/Vec3.inl | 47 +- libs/Jolt/Math/Vec4.h | 11 +- libs/Jolt/Math/Vec4.inl | 28 +- libs/Jolt/Math/Vec8.h | 112 --- libs/Jolt/Math/Vec8.inl | 148 --- libs/Jolt/Math/Vector.h | 2 +- libs/Jolt/ObjectStream/ObjectStreamIn.cpp | 2 +- libs/Jolt/ObjectStream/ObjectStreamOut.cpp | 24 +- libs/Jolt/ObjectStream/ObjectStreamOut.h | 7 +- .../ObjectStream/SerializableAttributeEnum.h | 4 +- .../ObjectStream/SerializableAttributeTyped.h | 4 +- libs/Jolt/ObjectStream/TypeDeclarations.cpp | 7 + libs/Jolt/ObjectStream/TypeDeclarations.h | 1 + libs/Jolt/Physics/Body/Body.cpp | 62 +- libs/Jolt/Physics/Body/Body.h | 104 ++- libs/Jolt/Physics/Body/Body.inl | 12 +- libs/Jolt/Physics/Body/BodyAccess.cpp | 18 - libs/Jolt/Physics/Body/BodyAccess.h | 35 +- libs/Jolt/Physics/Body/BodyCreationSettings.h | 2 +- libs/Jolt/Physics/Body/BodyID.h | 11 +- libs/Jolt/Physics/Body/BodyInterface.cpp | 64 +- libs/Jolt/Physics/Body/BodyInterface.h | 30 +- libs/Jolt/Physics/Body/BodyManager.cpp | 30 +- libs/Jolt/Physics/Body/BodyManager.h | 2 +- libs/Jolt/Physics/Body/MassProperties.h | 2 +- libs/Jolt/Physics/Body/MotionProperties.cpp | 4 +- libs/Jolt/Physics/Body/MotionProperties.h | 26 +- libs/Jolt/Physics/Body/MotionProperties.inl | 26 +- libs/Jolt/Physics/Body/MotionQuality.h | 2 +- libs/Jolt/Physics/Character/Character.cpp | 18 +- libs/Jolt/Physics/Character/Character.h | 6 +- libs/Jolt/Physics/Character/CharacterID.h | 98 ++ .../Physics/Character/CharacterVirtual.cpp | 347 ++++--- .../Jolt/Physics/Character/CharacterVirtual.h | 163 +++- .../Physics/Collision/BroadPhase/BroadPhase.h | 2 +- .../BroadPhase/BroadPhaseQuadTree.cpp | 30 +- .../Physics/Collision/BroadPhase/QuadTree.cpp | 128 ++- .../Physics/Collision/BroadPhase/QuadTree.h | 7 +- .../Collision/CastSphereVsTriangles.cpp | 2 +- .../Collision/CollideConvexVsTriangles.cpp | 15 +- libs/Jolt/Physics/Collision/CollideShape.h | 5 +- .../Collision/CollideShapeVsShapePerLeaf.h | 93 ++ .../Collision/CollideSoftBodyVertexIterator.h | 110 +++ .../CollideSoftBodyVerticesVsTriangles.h | 28 +- .../Physics/Collision/CollisionCollector.h | 6 +- .../Collision/CollisionCollectorImpl.h | 85 ++ .../Jolt/Physics/Collision/CollisionGroup.cpp | 2 + libs/Jolt/Physics/Collision/CollisionGroup.h | 5 +- libs/Jolt/Physics/Collision/ContactListener.h | 9 +- libs/Jolt/Physics/Collision/GroupFilter.h | 2 +- .../Jolt/Physics/Collision/GroupFilterTable.h | 2 +- .../Collision/InternalEdgeRemovingCollector.h | 66 +- .../Collision/ManifoldBetweenTwoFaces.cpp | 39 +- .../Collision/ManifoldBetweenTwoFaces.h | 4 +- .../Physics/Collision/NarrowPhaseQuery.cpp | 34 +- .../Jolt/Physics/Collision/NarrowPhaseQuery.h | 3 + .../Collision/ObjectLayerPairFilterTable.h | 2 +- libs/Jolt/Physics/Collision/PhysicsMaterial.h | 2 +- .../Physics/Collision/PhysicsMaterialSimple.h | 2 +- libs/Jolt/Physics/Collision/RayCast.h | 10 +- .../Jolt/Physics/Collision/Shape/BoxShape.cpp | 30 +- libs/Jolt/Physics/Collision/Shape/BoxShape.h | 4 +- .../Physics/Collision/Shape/CapsuleShape.cpp | 24 +- .../Physics/Collision/Shape/CapsuleShape.h | 3 +- .../Physics/Collision/Shape/CompoundShape.cpp | 27 +- .../Physics/Collision/Shape/CompoundShape.h | 15 +- .../Collision/Shape/ConvexHullShape.cpp | 23 +- .../Physics/Collision/Shape/ConvexHullShape.h | 6 +- .../Physics/Collision/Shape/ConvexShape.cpp | 33 +- .../Physics/Collision/Shape/ConvexShape.h | 2 +- .../Physics/Collision/Shape/CylinderShape.cpp | 83 +- .../Physics/Collision/Shape/CylinderShape.h | 4 +- .../Physics/Collision/Shape/DecoratedShape.h | 4 + .../Physics/Collision/Shape/EmptyShape.cpp | 65 ++ .../Jolt/Physics/Collision/Shape/EmptyShape.h | 75 ++ .../Collision/Shape/HeightFieldShape.cpp | 272 +++--- .../Collision/Shape/HeightFieldShape.h | 25 +- .../Physics/Collision/Shape/MeshShape.cpp | 110 ++- libs/Jolt/Physics/Collision/Shape/MeshShape.h | 24 +- .../Collision/Shape/MutableCompoundShape.cpp | 34 +- .../Collision/Shape/MutableCompoundShape.h | 11 +- .../Shape/OffsetCenterOfMassShape.cpp | 4 +- .../Collision/Shape/OffsetCenterOfMassShape.h | 4 +- .../Physics/Collision/Shape/PlaneShape.cpp | 541 +++++++++++ .../Jolt/Physics/Collision/Shape/PlaneShape.h | 147 +++ .../Shape/RotatedTranslatedShape.cpp | 4 +- .../Collision/Shape/RotatedTranslatedShape.h | 24 +- .../Physics/Collision/Shape/ScaleHelpers.h | 8 +- .../Physics/Collision/Shape/ScaledShape.cpp | 4 +- .../Physics/Collision/Shape/ScaledShape.h | 3 +- libs/Jolt/Physics/Collision/Shape/Shape.cpp | 12 +- libs/Jolt/Physics/Collision/Shape/Shape.h | 42 +- .../Physics/Collision/Shape/SphereShape.cpp | 21 +- .../Physics/Collision/Shape/SphereShape.h | 4 +- .../Collision/Shape/StaticCompoundShape.cpp | 6 +- .../Collision/Shape/StaticCompoundShape.h | 2 +- .../Physics/Collision/Shape/SubShapeIDPair.h | 17 +- .../Collision/Shape/TaperedCapsuleShape.cpp | 19 +- .../Shape/TaperedCapsuleShape.gliffy | 1 + .../Collision/Shape/TaperedCapsuleShape.h | 12 +- .../Collision/Shape/TaperedCylinderShape.cpp | 703 ++++++++++++++ .../Collision/Shape/TaperedCylinderShape.h | 132 +++ .../Physics/Collision/Shape/TriangleShape.cpp | 40 +- .../Physics/Collision/Shape/TriangleShape.h | 9 +- libs/Jolt/Physics/Collision/ShapeCast.h | 9 +- libs/Jolt/Physics/Collision/ShapeFilter.h | 3 +- libs/Jolt/Physics/Collision/SimShapeFilter.h | 40 + .../Physics/Collision/SimShapeFilterWrapper.h | 81 ++ .../Jolt/Physics/Collision/TransformedShape.h | 2 +- .../Jolt/Physics/Constraints/ConeConstraint.h | 2 +- libs/Jolt/Physics/Constraints/Constraint.h | 2 +- .../Physics/Constraints/ConstraintManager.h | 2 - .../Constraints/ContactConstraintManager.cpp | 266 ++++-- .../Constraints/ContactConstraintManager.h | 20 +- .../Physics/Constraints/DistanceConstraint.h | 2 +- .../Physics/Constraints/FixedConstraint.h | 2 +- .../Jolt/Physics/Constraints/GearConstraint.h | 2 +- .../Physics/Constraints/HingeConstraint.cpp | 2 +- .../Physics/Constraints/HingeConstraint.h | 2 +- libs/Jolt/Physics/Constraints/MotorSettings.h | 2 +- .../Jolt/Physics/Constraints/PathConstraint.h | 7 +- .../Physics/Constraints/PathConstraintPath.h | 2 +- .../Constraints/PathConstraintPathHermite.h | 2 +- .../Physics/Constraints/PointConstraint.h | 2 +- .../Physics/Constraints/PulleyConstraint.h | 2 +- .../Constraints/RackAndPinionConstraint.h | 2 +- .../Physics/Constraints/SixDOFConstraint.h | 2 +- .../Physics/Constraints/SliderConstraint.h | 2 +- .../Jolt/Physics/Constraints/SpringSettings.h | 2 +- .../Constraints/SwingTwistConstraint.h | 2 +- .../Physics/Constraints/TwoBodyConstraint.h | 2 +- libs/Jolt/Physics/LargeIslandSplitter.cpp | 11 +- libs/Jolt/Physics/PhysicsLock.cpp | 17 - libs/Jolt/Physics/PhysicsLock.h | 6 +- libs/Jolt/Physics/PhysicsScene.cpp | 2 +- libs/Jolt/Physics/PhysicsScene.h | 4 +- libs/Jolt/Physics/PhysicsSettings.h | 14 +- libs/Jolt/Physics/PhysicsStepListener.h | 12 +- libs/Jolt/Physics/PhysicsSystem.cpp | 220 +++-- libs/Jolt/Physics/PhysicsSystem.h | 64 +- libs/Jolt/Physics/Ragdoll/Ragdoll.h | 8 +- .../SoftBody/SoftBodyCreationSettings.h | 2 +- libs/Jolt/Physics/SoftBody/SoftBodyManifold.h | 17 +- .../SoftBody/SoftBodyMotionProperties.cpp | 384 +++++--- .../SoftBody/SoftBodyMotionProperties.h | 41 +- libs/Jolt/Physics/SoftBody/SoftBodyShape.cpp | 7 +- libs/Jolt/Physics/SoftBody/SoftBodyShape.h | 2 +- .../SoftBody/SoftBodySharedSettings.cpp | 202 +++- .../Physics/SoftBody/SoftBodySharedSettings.h | 10 +- .../Physics/SoftBody/SoftBodyUpdateContext.h | 7 +- libs/Jolt/Physics/SoftBody/SoftBodyVertex.h | 16 +- libs/Jolt/Physics/StateRecorder.h | 70 ++ libs/Jolt/Physics/StateRecorderImpl.h | 5 +- .../Physics/Vehicle/MotorcycleController.h | 2 +- .../Vehicle/TrackedVehicleController.cpp | 6 +- .../Vehicle/TrackedVehicleController.h | 6 +- .../Jolt/Physics/Vehicle/VehicleAntiRollBar.h | 4 +- .../Vehicle/VehicleCollisionTester.cpp | 4 +- .../Physics/Vehicle/VehicleConstraint.cpp | 59 +- libs/Jolt/Physics/Vehicle/VehicleConstraint.h | 14 +- libs/Jolt/Physics/Vehicle/VehicleController.h | 8 +- .../Physics/Vehicle/VehicleDifferential.h | 2 +- libs/Jolt/Physics/Vehicle/VehicleEngine.h | 2 +- libs/Jolt/Physics/Vehicle/VehicleTrack.h | 2 +- .../Physics/Vehicle/VehicleTransmission.h | 2 +- libs/Jolt/Physics/Vehicle/Wheel.h | 2 +- .../Vehicle/WheeledVehicleController.h | 4 +- libs/Jolt/RegisterTypes.cpp | 34 +- libs/Jolt/Renderer/DebugRenderer.cpp | 158 ++-- libs/Jolt/Renderer/DebugRenderer.h | 210 +++-- libs/Jolt/Skeleton/SkeletalAnimation.cpp | 55 ++ libs/Jolt/Skeleton/SkeletalAnimation.h | 22 +- libs/Jolt/Skeleton/Skeleton.h | 4 +- libs/Jolt/TriangleGrouper/TriangleGrouper.h | 27 - .../TriangleGrouperClosestCentroid.cpp | 95 -- .../TriangleGrouperClosestCentroid.h | 21 - .../TriangleGrouper/TriangleGrouperMorton.cpp | 49 - .../TriangleGrouper/TriangleGrouperMorton.h | 20 - .../TriangleSplitter/TriangleSplitter.cpp | 30 +- .../TriangleSplitterBinning.cpp | 71 +- .../TriangleSplitterFixedLeafSize.cpp | 170 ---- .../TriangleSplitterFixedLeafSize.h | 55 -- .../TriangleSplitterLongestAxis.cpp | 31 - .../TriangleSplitterLongestAxis.h | 28 - .../TriangleSplitter/TriangleSplitterMean.cpp | 11 +- .../TriangleSplitterMorton.cpp | 63 -- .../TriangleSplitter/TriangleSplitterMorton.h | 32 - libs/JoltC/JoltPhysicsC.h | 19 +- libs/JoltC/JoltPhysicsC_Extensions.cpp | 9 +- src/zphysics.zig | 32 +- 273 files changed, 8431 insertions(+), 3391 deletions(-) create mode 100644 libs/Jolt/Core/BinaryHeap.h create mode 100644 libs/Jolt/Core/HashTable.h create mode 100644 libs/Jolt/Core/STLLocalAllocator.h create mode 100644 libs/Jolt/Core/StridedPtr.h delete mode 100644 libs/Jolt/Geometry/RayAABox8.h delete mode 100644 libs/Jolt/Geometry/RayTriangle8.h create mode 100644 libs/Jolt/Math/BVec16.h create mode 100644 libs/Jolt/Math/BVec16.inl delete mode 100644 libs/Jolt/Math/UVec8.h delete mode 100644 libs/Jolt/Math/UVec8.inl delete mode 100644 libs/Jolt/Math/Vec8.h delete mode 100644 libs/Jolt/Math/Vec8.inl delete mode 100644 libs/Jolt/Physics/Body/BodyAccess.cpp create mode 100644 libs/Jolt/Physics/Character/CharacterID.h create mode 100644 libs/Jolt/Physics/Collision/CollideShapeVsShapePerLeaf.h create mode 100644 libs/Jolt/Physics/Collision/CollideSoftBodyVertexIterator.h create mode 100644 libs/Jolt/Physics/Collision/Shape/EmptyShape.cpp create mode 100644 libs/Jolt/Physics/Collision/Shape/EmptyShape.h create mode 100644 libs/Jolt/Physics/Collision/Shape/PlaneShape.cpp create mode 100644 libs/Jolt/Physics/Collision/Shape/PlaneShape.h create mode 100644 libs/Jolt/Physics/Collision/Shape/TaperedCapsuleShape.gliffy create mode 100644 libs/Jolt/Physics/Collision/Shape/TaperedCylinderShape.cpp create mode 100644 libs/Jolt/Physics/Collision/Shape/TaperedCylinderShape.h create mode 100644 libs/Jolt/Physics/Collision/SimShapeFilter.h create mode 100644 libs/Jolt/Physics/Collision/SimShapeFilterWrapper.h delete mode 100644 libs/Jolt/Physics/PhysicsLock.cpp delete mode 100644 libs/Jolt/TriangleGrouper/TriangleGrouper.h delete mode 100644 libs/Jolt/TriangleGrouper/TriangleGrouperClosestCentroid.cpp delete mode 100644 libs/Jolt/TriangleGrouper/TriangleGrouperClosestCentroid.h delete mode 100644 libs/Jolt/TriangleGrouper/TriangleGrouperMorton.cpp delete mode 100644 libs/Jolt/TriangleGrouper/TriangleGrouperMorton.h delete mode 100644 libs/Jolt/TriangleSplitter/TriangleSplitterFixedLeafSize.cpp delete mode 100644 libs/Jolt/TriangleSplitter/TriangleSplitterFixedLeafSize.h delete mode 100644 libs/Jolt/TriangleSplitter/TriangleSplitterLongestAxis.cpp delete mode 100644 libs/Jolt/TriangleSplitter/TriangleSplitterLongestAxis.h delete mode 100644 libs/Jolt/TriangleSplitter/TriangleSplitterMorton.cpp delete mode 100644 libs/Jolt/TriangleSplitter/TriangleSplitterMorton.h diff --git a/build.zig b/build.zig index 411ff00..03799d8 100644 --- a/build.zig +++ b/build.zig @@ -104,9 +104,9 @@ pub fn build(b: *std.Build) void { src_dir ++ "/Core/Color.cpp", src_dir ++ "/Core/Factory.cpp", src_dir ++ "/Core/IssueReporting.cpp", + src_dir ++ "/Core/JobSystemSingleThreaded.cpp", src_dir ++ "/Core/JobSystemThreadPool.cpp", src_dir ++ "/Core/JobSystemWithBarrier.cpp", - src_dir ++ "/Core/JobSystemSingleThreaded.cpp", src_dir ++ "/Core/LinearCurve.cpp", src_dir ++ "/Core/Memory.cpp", src_dir ++ "/Core/Profiler.cpp", @@ -129,16 +129,11 @@ pub fn build(b: *std.Build) void { src_dir ++ "/ObjectStream/SerializableObject.cpp", src_dir ++ "/ObjectStream/TypeDeclarations.cpp", src_dir ++ "/Physics/Body/Body.cpp", - src_dir ++ "/Physics/Body/BodyAccess.cpp", src_dir ++ "/Physics/Body/BodyCreationSettings.cpp", src_dir ++ "/Physics/Body/BodyInterface.cpp", src_dir ++ "/Physics/Body/BodyManager.cpp", src_dir ++ "/Physics/Body/MassProperties.cpp", src_dir ++ "/Physics/Body/MotionProperties.cpp", - src_dir ++ "/Physics/SoftBody/SoftBodyCreationSettings.cpp", - src_dir ++ "/Physics/SoftBody/SoftBodyMotionProperties.cpp", - src_dir ++ "/Physics/SoftBody/SoftBodyShape.cpp", - src_dir ++ "/Physics/SoftBody/SoftBodySharedSettings.cpp", src_dir ++ "/Physics/Character/Character.cpp", src_dir ++ "/Physics/Character/CharacterBase.cpp", src_dir ++ "/Physics/Character/CharacterVirtual.cpp", @@ -152,6 +147,7 @@ pub fn build(b: *std.Build) void { src_dir ++ "/Physics/Collision/CollideSphereVsTriangles.cpp", src_dir ++ "/Physics/Collision/CollisionDispatch.cpp", src_dir ++ "/Physics/Collision/CollisionGroup.cpp", + src_dir ++ "/Physics/Collision/EstimateCollisionResponse.cpp", src_dir ++ "/Physics/Collision/GroupFilter.cpp", src_dir ++ "/Physics/Collision/GroupFilterTable.cpp", src_dir ++ "/Physics/Collision/ManifoldBetweenTwoFaces.cpp", @@ -166,16 +162,19 @@ pub fn build(b: *std.Build) void { src_dir ++ "/Physics/Collision/Shape/ConvexShape.cpp", src_dir ++ "/Physics/Collision/Shape/CylinderShape.cpp", src_dir ++ "/Physics/Collision/Shape/DecoratedShape.cpp", + src_dir ++ "/Physics/Collision/Shape/EmptyShape.cpp", src_dir ++ "/Physics/Collision/Shape/HeightFieldShape.cpp", src_dir ++ "/Physics/Collision/Shape/MeshShape.cpp", src_dir ++ "/Physics/Collision/Shape/MutableCompoundShape.cpp", src_dir ++ "/Physics/Collision/Shape/OffsetCenterOfMassShape.cpp", + src_dir ++ "/Physics/Collision/Shape/PlaneShape.cpp", src_dir ++ "/Physics/Collision/Shape/RotatedTranslatedShape.cpp", src_dir ++ "/Physics/Collision/Shape/ScaledShape.cpp", src_dir ++ "/Physics/Collision/Shape/Shape.cpp", src_dir ++ "/Physics/Collision/Shape/SphereShape.cpp", src_dir ++ "/Physics/Collision/Shape/StaticCompoundShape.cpp", src_dir ++ "/Physics/Collision/Shape/TaperedCapsuleShape.cpp", + src_dir ++ "/Physics/Collision/Shape/TaperedCylinderShape.cpp", src_dir ++ "/Physics/Collision/Shape/TriangleShape.cpp", src_dir ++ "/Physics/Collision/TransformedShape.cpp", src_dir ++ "/Physics/Constraints/ConeConstraint.cpp", @@ -191,13 +190,13 @@ pub fn build(b: *std.Build) void { src_dir ++ "/Physics/Constraints/PathConstraintPath.cpp", src_dir ++ "/Physics/Constraints/PathConstraintPathHermite.cpp", src_dir ++ "/Physics/Constraints/PointConstraint.cpp", + src_dir ++ "/Physics/Constraints/PulleyConstraint.cpp", src_dir ++ "/Physics/Constraints/RackAndPinionConstraint.cpp", src_dir ++ "/Physics/Constraints/SixDOFConstraint.cpp", src_dir ++ "/Physics/Constraints/SliderConstraint.cpp", + src_dir ++ "/Physics/Constraints/SpringSettings.cpp", src_dir ++ "/Physics/Constraints/SwingTwistConstraint.cpp", src_dir ++ "/Physics/Constraints/TwoBodyConstraint.cpp", - src_dir ++ "/Physics/Constraints/PulleyConstraint.cpp", - src_dir ++ "/Physics/Constraints/SpringSettings.cpp", src_dir ++ "/Physics/DeterminismLog.cpp", src_dir ++ "/Physics/IslandBuilder.cpp", src_dir ++ "/Physics/LargeIslandSplitter.cpp", @@ -205,7 +204,12 @@ pub fn build(b: *std.Build) void { src_dir ++ "/Physics/PhysicsSystem.cpp", src_dir ++ "/Physics/PhysicsUpdateContext.cpp", src_dir ++ "/Physics/Ragdoll/Ragdoll.cpp", + src_dir ++ "/Physics/SoftBody/SoftBodyCreationSettings.cpp", + src_dir ++ "/Physics/SoftBody/SoftBodyMotionProperties.cpp", + src_dir ++ "/Physics/SoftBody/SoftBodyShape.cpp", + src_dir ++ "/Physics/SoftBody/SoftBodySharedSettings.cpp", src_dir ++ "/Physics/StateRecorderImpl.cpp", + src_dir ++ "/Physics/Vehicle/MotorcycleController.cpp", src_dir ++ "/Physics/Vehicle/TrackedVehicleController.cpp", src_dir ++ "/Physics/Vehicle/VehicleAntiRollBar.cpp", src_dir ++ "/Physics/Vehicle/VehicleCollisionTester.cpp", @@ -217,24 +221,18 @@ pub fn build(b: *std.Build) void { src_dir ++ "/Physics/Vehicle/VehicleTransmission.cpp", src_dir ++ "/Physics/Vehicle/Wheel.cpp", src_dir ++ "/Physics/Vehicle/WheeledVehicleController.cpp", - src_dir ++ "/Physics/Vehicle/MotorcycleController.cpp", src_dir ++ "/RegisterTypes.cpp", src_dir ++ "/Renderer/DebugRenderer.cpp", - src_dir ++ "/Renderer/DebugRendererSimple.cpp", src_dir ++ "/Renderer/DebugRendererPlayback.cpp", src_dir ++ "/Renderer/DebugRendererRecorder.cpp", + src_dir ++ "/Renderer/DebugRendererSimple.cpp", src_dir ++ "/Skeleton/SkeletalAnimation.cpp", src_dir ++ "/Skeleton/Skeleton.cpp", src_dir ++ "/Skeleton/SkeletonMapper.cpp", src_dir ++ "/Skeleton/SkeletonPose.cpp", - src_dir ++ "/TriangleGrouper/TriangleGrouperClosestCentroid.cpp", - src_dir ++ "/TriangleGrouper/TriangleGrouperMorton.cpp", src_dir ++ "/TriangleSplitter/TriangleSplitter.cpp", src_dir ++ "/TriangleSplitter/TriangleSplitterBinning.cpp", - src_dir ++ "/TriangleSplitter/TriangleSplitterFixedLeafSize.cpp", - src_dir ++ "/TriangleSplitter/TriangleSplitterLongestAxis.cpp", src_dir ++ "/TriangleSplitter/TriangleSplitterMean.cpp", - src_dir ++ "/TriangleSplitter/TriangleSplitterMorton.cpp", }, .flags = c_flags, }); @@ -246,15 +244,6 @@ pub fn build(b: *std.Build) void { }); } - if (target.result.abi != .msvc or optimize != .Debug) { - joltc.addCSourceFiles(.{ - .files = &.{ - src_dir ++ "/Physics/PhysicsLock.cpp", - }, - .flags = c_flags, - }); - } - const test_step = b.step("test", "Run zphysics tests"); const tests = b.addTest(.{ diff --git a/libs/Jolt/AABBTree/AABBTreeBuilder.cpp b/libs/Jolt/AABBTree/AABBTreeBuilder.cpp index e4b7ece..4024132 100644 --- a/libs/Jolt/AABBTree/AABBTreeBuilder.cpp +++ b/libs/Jolt/AABBTree/AABBTreeBuilder.cpp @@ -8,84 +8,72 @@ JPH_NAMESPACE_BEGIN -AABBTreeBuilder::Node::Node() -{ - mChild[0] = nullptr; - mChild[1] = nullptr; -} - -AABBTreeBuilder::Node::~Node() -{ - delete mChild[0]; - delete mChild[1]; -} - -uint AABBTreeBuilder::Node::GetMinDepth() const +uint AABBTreeBuilder::Node::GetMinDepth(const Array &inNodes) const { if (HasChildren()) { - uint left = mChild[0]->GetMinDepth(); - uint right = mChild[1]->GetMinDepth(); + uint left = inNodes[mChild[0]].GetMinDepth(inNodes); + uint right = inNodes[mChild[1]].GetMinDepth(inNodes); return min(left, right) + 1; } else return 1; } -uint AABBTreeBuilder::Node::GetMaxDepth() const +uint AABBTreeBuilder::Node::GetMaxDepth(const Array &inNodes) const { if (HasChildren()) { - uint left = mChild[0]->GetMaxDepth(); - uint right = mChild[1]->GetMaxDepth(); + uint left = inNodes[mChild[0]].GetMaxDepth(inNodes); + uint right = inNodes[mChild[1]].GetMaxDepth(inNodes); return max(left, right) + 1; } else return 1; } -uint AABBTreeBuilder::Node::GetNodeCount() const +uint AABBTreeBuilder::Node::GetNodeCount(const Array &inNodes) const { if (HasChildren()) - return mChild[0]->GetNodeCount() + mChild[1]->GetNodeCount() + 1; + return inNodes[mChild[0]].GetNodeCount(inNodes) + inNodes[mChild[1]].GetNodeCount(inNodes) + 1; else return 1; } -uint AABBTreeBuilder::Node::GetLeafNodeCount() const +uint AABBTreeBuilder::Node::GetLeafNodeCount(const Array &inNodes) const { if (HasChildren()) - return mChild[0]->GetLeafNodeCount() + mChild[1]->GetLeafNodeCount(); + return inNodes[mChild[0]].GetLeafNodeCount(inNodes) + inNodes[mChild[1]].GetLeafNodeCount(inNodes); else return 1; } -uint AABBTreeBuilder::Node::GetTriangleCountInTree() const +uint AABBTreeBuilder::Node::GetTriangleCountInTree(const Array &inNodes) const { if (HasChildren()) - return mChild[0]->GetTriangleCountInTree() + mChild[1]->GetTriangleCountInTree(); + return inNodes[mChild[0]].GetTriangleCountInTree(inNodes) + inNodes[mChild[1]].GetTriangleCountInTree(inNodes); else return GetTriangleCount(); } -void AABBTreeBuilder::Node::GetTriangleCountPerNode(float &outAverage, uint &outMin, uint &outMax) const +void AABBTreeBuilder::Node::GetTriangleCountPerNode(const Array &inNodes, float &outAverage, uint &outMin, uint &outMax) const { outMin = INT_MAX; outMax = 0; outAverage = 0; uint avg_divisor = 0; - GetTriangleCountPerNodeInternal(outAverage, avg_divisor, outMin, outMax); + GetTriangleCountPerNodeInternal(inNodes, outAverage, avg_divisor, outMin, outMax); if (avg_divisor > 0) outAverage /= avg_divisor; } -float AABBTreeBuilder::Node::CalculateSAHCost(float inCostTraversal, float inCostLeaf) const +float AABBTreeBuilder::Node::CalculateSAHCost(const Array &inNodes, float inCostTraversal, float inCostLeaf) const { float surface_area = mBounds.GetSurfaceArea(); - return surface_area > 0.0f? CalculateSAHCostInternal(inCostTraversal / surface_area, inCostLeaf / surface_area) : 0.0f; + return surface_area > 0.0f? CalculateSAHCostInternal(inNodes, inCostTraversal / surface_area, inCostLeaf / surface_area) : 0.0f; } -void AABBTreeBuilder::Node::GetNChildren(uint inN, Array &outChildren) const +void AABBTreeBuilder::Node::GetNChildren(const Array &inNodes, uint inN, Array &outChildren) const { JPH_ASSERT(outChildren.empty()); @@ -94,8 +82,8 @@ void AABBTreeBuilder::Node::GetNChildren(uint inN, Array &outChild return; // Start with the children of this node - outChildren.push_back(mChild[0]); - outChildren.push_back(mChild[1]); + outChildren.push_back(&inNodes[mChild[0]]); + outChildren.push_back(&inNodes[mChild[1]]); size_t next = 0; bool all_triangles = true; @@ -116,8 +104,8 @@ void AABBTreeBuilder::Node::GetNChildren(uint inN, Array &outChild if (to_expand->HasChildren()) { outChildren.erase(outChildren.begin() + next); - outChildren.push_back(to_expand->mChild[0]); - outChildren.push_back(to_expand->mChild[1]); + outChildren.push_back(&inNodes[to_expand->mChild[0]]); + outChildren.push_back(&inNodes[to_expand->mChild[1]]); all_triangles = false; } else @@ -127,22 +115,22 @@ void AABBTreeBuilder::Node::GetNChildren(uint inN, Array &outChild } } -float AABBTreeBuilder::Node::CalculateSAHCostInternal(float inCostTraversalDivSurfaceArea, float inCostLeafDivSurfaceArea) const +float AABBTreeBuilder::Node::CalculateSAHCostInternal(const Array &inNodes, float inCostTraversalDivSurfaceArea, float inCostLeafDivSurfaceArea) const { if (HasChildren()) return inCostTraversalDivSurfaceArea * mBounds.GetSurfaceArea() - + mChild[0]->CalculateSAHCostInternal(inCostTraversalDivSurfaceArea, inCostLeafDivSurfaceArea) - + mChild[1]->CalculateSAHCostInternal(inCostTraversalDivSurfaceArea, inCostLeafDivSurfaceArea); + + inNodes[mChild[0]].CalculateSAHCostInternal(inNodes, inCostTraversalDivSurfaceArea, inCostLeafDivSurfaceArea) + + inNodes[mChild[1]].CalculateSAHCostInternal(inNodes, inCostTraversalDivSurfaceArea, inCostLeafDivSurfaceArea); else return inCostLeafDivSurfaceArea * mBounds.GetSurfaceArea() * GetTriangleCount(); } -void AABBTreeBuilder::Node::GetTriangleCountPerNodeInternal(float &outAverage, uint &outAverageDivisor, uint &outMin, uint &outMax) const +void AABBTreeBuilder::Node::GetTriangleCountPerNodeInternal(const Array &inNodes, float &outAverage, uint &outAverageDivisor, uint &outMin, uint &outMax) const { if (HasChildren()) { - mChild[0]->GetTriangleCountPerNodeInternal(outAverage, outAverageDivisor, outMin, outMax); - mChild[1]->GetTriangleCountPerNodeInternal(outAverage, outAverageDivisor, outMin, outMax); + inNodes[mChild[0]].GetTriangleCountPerNodeInternal(inNodes, outAverage, outAverageDivisor, outMin, outMax); + inNodes[mChild[1]].GetTriangleCountPerNodeInternal(inNodes, outAverage, outAverageDivisor, outMin, outMax); } else { @@ -162,28 +150,36 @@ AABBTreeBuilder::AABBTreeBuilder(TriangleSplitter &inSplitter, uint inMaxTriangl AABBTreeBuilder::Node *AABBTreeBuilder::Build(AABBTreeBuilderStats &outStats) { TriangleSplitter::Range initial = mTriangleSplitter.GetInitialRange(); - Node *root = BuildInternal(initial); + // Worst case for number of nodes: 1 leaf node per triangle. At each level above, the number of nodes is half that of the level below. + // This means that at most we'll be allocating 2x the number of triangles in nodes. + mNodes.reserve(2 * initial.Count()); + mTriangles.reserve(initial.Count()); + + // Build the tree + Node &root = mNodes[BuildInternal(initial)]; + + // Collect stats float avg_triangles_per_leaf; uint min_triangles_per_leaf, max_triangles_per_leaf; - root->GetTriangleCountPerNode(avg_triangles_per_leaf, min_triangles_per_leaf, max_triangles_per_leaf); + root.GetTriangleCountPerNode(mNodes, avg_triangles_per_leaf, min_triangles_per_leaf, max_triangles_per_leaf); mTriangleSplitter.GetStats(outStats.mSplitterStats); - outStats.mSAHCost = root->CalculateSAHCost(1.0f, 1.0f); - outStats.mMinDepth = root->GetMinDepth(); - outStats.mMaxDepth = root->GetMaxDepth(); - outStats.mNodeCount = root->GetNodeCount(); - outStats.mLeafNodeCount = root->GetLeafNodeCount(); + outStats.mSAHCost = root.CalculateSAHCost(mNodes, 1.0f, 1.0f); + outStats.mMinDepth = root.GetMinDepth(mNodes); + outStats.mMaxDepth = root.GetMaxDepth(mNodes); + outStats.mNodeCount = root.GetNodeCount(mNodes); + outStats.mLeafNodeCount = root.GetLeafNodeCount(mNodes); outStats.mMaxTrianglesPerLeaf = mMaxTrianglesPerLeaf; outStats.mTreeMinTrianglesPerLeaf = min_triangles_per_leaf; outStats.mTreeMaxTrianglesPerLeaf = max_triangles_per_leaf; outStats.mTreeAvgTrianglesPerLeaf = avg_triangles_per_leaf; - return root; + return &root; } -AABBTreeBuilder::Node *AABBTreeBuilder::BuildInternal(const TriangleSplitter::Range &inTriangles) +uint AABBTreeBuilder::BuildInternal(const TriangleSplitter::Range &inTriangles) { // Check if there are too many triangles left if (inTriangles.Count() > mMaxTrianglesPerLeaf) @@ -214,26 +210,33 @@ AABBTreeBuilder::Node *AABBTreeBuilder::BuildInternal(const TriangleSplitter::Ra } // Recursively build - Node *node = new Node(); - node->mChild[0] = BuildInternal(left); - node->mChild[1] = BuildInternal(right); - node->mBounds = node->mChild[0]->mBounds; - node->mBounds.Encapsulate(node->mChild[1]->mBounds); - return node; + const uint node_index = (uint)mNodes.size(); + mNodes.push_back(Node()); + uint left_index = BuildInternal(left); + uint right_index = BuildInternal(right); + Node &node = mNodes[node_index]; + node.mChild[0] = left_index; + node.mChild[1] = right_index; + node.mBounds = mNodes[node.mChild[0]].mBounds; + node.mBounds.Encapsulate(mNodes[node.mChild[1]].mBounds); + return node_index; } // Create leaf node - Node *node = new Node(); - node->mTriangles.reserve(inTriangles.Count()); + const uint node_index = (uint)mNodes.size(); + mNodes.push_back(Node()); + Node &node = mNodes.back(); + node.mTrianglesBegin = (uint)mTriangles.size(); + node.mNumTriangles = inTriangles.mEnd - inTriangles.mBegin; + const VertexList &v = mTriangleSplitter.GetVertices(); for (uint i = inTriangles.mBegin; i < inTriangles.mEnd; ++i) { const IndexedTriangle &t = mTriangleSplitter.GetTriangle(i); - const VertexList &v = mTriangleSplitter.GetVertices(); - node->mTriangles.push_back(t); - node->mBounds.Encapsulate(v, t); + mTriangles.push_back(t); + node.mBounds.Encapsulate(v, t); } - return node; + return node_index; } JPH_NAMESPACE_END diff --git a/libs/Jolt/AABBTree/AABBTreeBuilder.h b/libs/Jolt/AABBTree/AABBTreeBuilder.h index dacae5e..3b0635c 100644 --- a/libs/Jolt/AABBTree/AABBTreeBuilder.h +++ b/libs/Jolt/AABBTree/AABBTreeBuilder.h @@ -36,62 +36,62 @@ class JPH_EXPORT AABBTreeBuilder { public: /// A node in the tree, contains the AABox for the tree and any child nodes or triangles - class Node : public NonCopyable + class Node { public: JPH_OVERRIDE_NEW_DELETE - /// Constructor - Node(); - ~Node(); + /// Indicates that there is no child + static constexpr uint cInvalidNodeIndex = ~uint(0); /// Get number of triangles in this node - inline uint GetTriangleCount() const { return uint(mTriangles.size()); } + inline uint GetTriangleCount() const { return mNumTriangles; } /// Check if this node has any children - inline bool HasChildren() const { return mChild[0] != nullptr || mChild[1] != nullptr; } + inline bool HasChildren() const { return mChild[0] != cInvalidNodeIndex || mChild[1] != cInvalidNodeIndex; } /// Min depth of tree - uint GetMinDepth() const; + uint GetMinDepth(const Array &inNodes) const; /// Max depth of tree - uint GetMaxDepth() const; + uint GetMaxDepth(const Array &inNodes) const; /// Number of nodes in tree - uint GetNodeCount() const; + uint GetNodeCount(const Array &inNodes) const; /// Number of leaf nodes in tree - uint GetLeafNodeCount() const; + uint GetLeafNodeCount(const Array &inNodes) const; /// Get triangle count in tree - uint GetTriangleCountInTree() const; + uint GetTriangleCountInTree(const Array &inNodes) const; /// Calculate min and max triangles per node - void GetTriangleCountPerNode(float &outAverage, uint &outMin, uint &outMax) const; + void GetTriangleCountPerNode(const Array &inNodes, float &outAverage, uint &outMin, uint &outMax) const; /// Calculate the total cost of the tree using the surface area heuristic - float CalculateSAHCost(float inCostTraversal, float inCostLeaf) const; + float CalculateSAHCost(const Array &inNodes, float inCostTraversal, float inCostLeaf) const; /// Recursively get children (breadth first) to get in total inN children (or less if there are no more) - void GetNChildren(uint inN, Array &outChildren) const; + void GetNChildren(const Array &inNodes, uint inN, Array &outChildren) const; /// Bounding box AABox mBounds; /// Triangles (if no child nodes) - IndexedTriangleList mTriangles; + uint mTrianglesBegin; // Index into mTriangles + uint mNumTriangles = 0; - /// Child nodes (if no triangles) - Node * mChild[2]; + /// Child node indices (if no triangles) + uint mChild[2] = { cInvalidNodeIndex, cInvalidNodeIndex }; private: friend class AABBTreeBuilder; /// Recursive helper function to calculate cost of the tree - float CalculateSAHCostInternal(float inCostTraversalDivSurfaceArea, float inCostLeafDivSurfaceArea) const; + float CalculateSAHCostInternal(const Array &inNodes, float inCostTraversalDivSurfaceArea, float inCostLeafDivSurfaceArea) const; /// Recursive helper function to calculate min and max triangles per node - void GetTriangleCountPerNodeInternal(float &outAverage, uint &outAverageDivisor, uint &outMin, uint &outMax) const; + void GetTriangleCountPerNodeInternal(const Array &inNodes, float &outAverage, uint &outAverageDivisor, uint &outMin, uint &outMax) const; }; /// Constructor @@ -100,11 +100,19 @@ class JPH_EXPORT AABBTreeBuilder /// Recursively build tree, returns the root node of the tree Node * Build(AABBTreeBuilderStats &outStats); + /// Get all nodes + const Array & GetNodes() const { return mNodes; } + + /// Get all triangles + const Array &GetTriangles() const { return mTriangles; } + private: - Node * BuildInternal(const TriangleSplitter::Range &inTriangles); + uint BuildInternal(const TriangleSplitter::Range &inTriangles); TriangleSplitter & mTriangleSplitter; const uint mMaxTrianglesPerLeaf; + Array mNodes; + Array mTriangles; }; JPH_NAMESPACE_END diff --git a/libs/Jolt/AABBTree/AABBTreeToBuffer.h b/libs/Jolt/AABBTree/AABBTreeToBuffer.h index 1fa9bb7..d50c750 100644 --- a/libs/Jolt/AABBTree/AABBTreeToBuffer.h +++ b/libs/Jolt/AABBTree/AABBTreeToBuffer.h @@ -8,14 +8,8 @@ #include #include -JPH_SUPPRESS_WARNINGS_STD_BEGIN -#include -JPH_SUPPRESS_WARNINGS_STD_END - JPH_NAMESPACE_BEGIN -template using Deque = std::deque>; - /// Conversion algorithm that converts an AABB tree to an optimized binary buffer template class AABBTreeToBuffer @@ -37,20 +31,89 @@ class AABBTreeToBuffer static const int TriangleHeaderSize = TriangleCodec::TriangleHeaderSize; /// Convert AABB tree. Returns false if failed. - bool Convert(const VertexList &inVertices, const AABBTreeBuilder::Node *inRoot, const char *&outError) + bool Convert(const Array &inTriangles, const Array &inNodes, const VertexList &inVertices, const AABBTreeBuilder::Node *inRoot, bool inStoreUserData, const char *&outError) { - const typename NodeCodec::EncodingContext node_ctx; + typename NodeCodec::EncodingContext node_ctx; typename TriangleCodec::EncodingContext tri_ctx(inVertices); - // Estimate the amount of memory required - uint tri_count = inRoot->GetTriangleCountInTree(); - uint node_count = inRoot->GetNodeCount(); - uint nodes_size = node_ctx.GetPessimisticMemoryEstimate(node_count); - uint total_size = HeaderSize + TriangleHeaderSize + nodes_size + tri_ctx.GetPessimisticMemoryEstimate(tri_count); - mTree.reserve(total_size); + // Child nodes out of loop so we don't constantly realloc it + Array child_nodes; + child_nodes.reserve(NumChildrenPerNode); + + // First calculate how big the tree is going to be. + // Since the tree can be huge for very large meshes, we don't want + // to reallocate the buffer as it may cause out of memory situations. + // This loop mimics the construction loop below. + uint64 total_size = HeaderSize + TriangleHeaderSize; + size_t node_count = 1; // Start with root node + size_t to_process_max_size = 1; // Track size of queues so we can do a single reserve below + size_t to_process_triangles_max_size = 0; + { // A scope to free the memory associated with to_estimate and to_estimate_triangles + Array to_estimate; + Array to_estimate_triangles; + to_estimate.push_back(inRoot); + for (;;) + { + while (!to_estimate.empty()) + { + // Get the next node to process + const AABBTreeBuilder::Node *node = to_estimate.back(); + to_estimate.pop_back(); + + // Update total size + node_ctx.PrepareNodeAllocate(node, total_size); + + if (node->HasChildren()) + { + // Collect the first NumChildrenPerNode sub-nodes in the tree + child_nodes.clear(); // Won't free the memory + node->GetNChildren(inNodes, NumChildrenPerNode, child_nodes); + + // Increment the number of nodes we're going to store + node_count += child_nodes.size(); + + // Insert in reverse order so we estimate left child first when taking nodes from the back + for (int idx = int(child_nodes.size()) - 1; idx >= 0; --idx) + { + // Store triangles in separate list so we process them last + const AABBTreeBuilder::Node *child = child_nodes[idx]; + if (child->HasChildren()) + { + to_estimate.push_back(child); + to_process_max_size = max(to_estimate.size(), to_process_max_size); + } + else + { + to_estimate_triangles.push_back(child); + to_process_triangles_max_size = max(to_estimate_triangles.size(), to_process_triangles_max_size); + } + } + } + else + { + // Update total size + tri_ctx.PreparePack(&inTriangles[node->mTrianglesBegin], node->mNumTriangles, inStoreUserData, total_size); + } + } + + // If we've got triangles to estimate, loop again with just the triangles + if (to_estimate_triangles.empty()) + break; + else + to_estimate.swap(to_estimate_triangles); + } + } + + // Finalize the prepare stage for the triangle context + tri_ctx.FinalizePreparePack(total_size); - // Reset counters - mNodesSize = 0; + // Reserve the buffer + if (size_t(total_size) != total_size) + { + outError = "AABBTreeToBuffer: Out of memory!"; + return false; + } + mTree.reserve(size_t(total_size)); // Add headers NodeHeader *header = HeaderSize > 0? mTree.Allocate() : nullptr; @@ -61,19 +124,20 @@ class AABBTreeToBuffer const AABBTreeBuilder::Node * mNode = nullptr; // Node that this entry belongs to Vec3 mNodeBoundsMin; // Quantized node bounds Vec3 mNodeBoundsMax; - uint mNodeStart = uint(-1); // Start of node in mTree - uint mTriangleStart = uint(-1); // Start of the triangle data in mTree + size_t mNodeStart = size_t(-1); // Start of node in mTree + size_t mTriangleStart = size_t(-1); // Start of the triangle data in mTree + size_t mChildNodeStart[NumChildrenPerNode]; // Start of the children of the node in mTree + size_t mChildTrianglesStart[NumChildrenPerNode]; // Start of the triangle data in mTree + size_t * mParentChildNodeStart = nullptr; // Where to store mNodeStart (to patch mChildNodeStart of my parent) + size_t * mParentTrianglesStart = nullptr; // Where to store mTriangleStart (to patch mChildTrianglesStart of my parent) uint mNumChildren = 0; // Number of children - uint mChildNodeStart[NumChildrenPerNode]; // Start of the children of the node in mTree - uint mChildTrianglesStart[NumChildrenPerNode]; // Start of the triangle data in mTree - uint * mParentChildNodeStart = nullptr; // Where to store mNodeStart (to patch mChildNodeStart of my parent) - uint * mParentTrianglesStart = nullptr; // Where to store mTriangleStart (to patch mChildTrianglesStart of my parent) }; - Deque to_process; - Deque to_process_triangles; + Array to_process; + to_process.reserve(to_process_max_size); + Array to_process_triangles; + to_process_triangles.reserve(to_process_triangles_max_size); Array node_list; - node_list.reserve(node_count); // Needed to ensure that array is not reallocated, so we can keep pointers in the array NodeData root; @@ -83,10 +147,6 @@ class AABBTreeToBuffer node_list.push_back(root); to_process.push_back(&node_list.back()); - // Child nodes out of loop so we don't constantly realloc it - Array child_nodes; - child_nodes.reserve(NumChildrenPerNode); - for (;;) { while (!to_process.empty()) @@ -100,7 +160,7 @@ class AABBTreeToBuffer // Collect the first NumChildrenPerNode sub-nodes in the tree child_nodes.clear(); // Won't free the memory - node_data->mNode->GetNChildren(NumChildrenPerNode, child_nodes); + node_data->mNode->GetNChildren(inNodes, NumChildrenPerNode, child_nodes); node_data->mNumChildren = (uint)child_nodes.size(); // Fill in default child bounds @@ -118,37 +178,31 @@ class AABBTreeToBuffer } // Start a new node - uint old_size = (uint)mTree.size(); node_data->mNodeStart = node_ctx.NodeAllocate(node_data->mNode, node_data->mNodeBoundsMin, node_data->mNodeBoundsMax, child_nodes, child_bounds_min, child_bounds_max, mTree, outError); - if (node_data->mNodeStart == uint(-1)) + if (node_data->mNodeStart == size_t(-1)) return false; - mNodesSize += (uint)mTree.size() - old_size; if (node_data->mNode->HasChildren()) { // Insert in reverse order so we process left child first when taking nodes from the back for (int idx = int(child_nodes.size()) - 1; idx >= 0; --idx) { + const AABBTreeBuilder::Node *child_node = child_nodes[idx]; + // Due to quantization box could have become bigger, not smaller - JPH_ASSERT(AABox(child_bounds_min[idx], child_bounds_max[idx]).Contains(child_nodes[idx]->mBounds), "AABBTreeToBuffer: Bounding box became smaller!"); + JPH_ASSERT(AABox(child_bounds_min[idx], child_bounds_max[idx]).Contains(child_node->mBounds), "AABBTreeToBuffer: Bounding box became smaller!"); // Add child to list of nodes to be processed NodeData child; - child.mNode = child_nodes[idx]; + child.mNode = child_node; child.mNodeBoundsMin = child_bounds_min[idx]; child.mNodeBoundsMax = child_bounds_max[idx]; child.mParentChildNodeStart = &node_data->mChildNodeStart[idx]; child.mParentTrianglesStart = &node_data->mChildTrianglesStart[idx]; - NodeData *old = &node_list[0]; node_list.push_back(child); - if (old != &node_list[0]) - { - outError = "Internal Error: Array reallocated, memory corruption!"; - return false; - } // Store triangles in separate list so we process them last - if (node_list.back().mNode->HasChildren()) + if (child_node->HasChildren()) to_process.push_back(&node_list.back()); else to_process_triangles.push_back(&node_list.back()); @@ -157,8 +211,8 @@ class AABBTreeToBuffer else { // Add triangles - node_data->mTriangleStart = tri_ctx.Pack(node_data->mNode->mTriangles, mTree, outError); - if (node_data->mTriangleStart == uint(-1)) + node_data->mTriangleStart = tri_ctx.Pack(&inTriangles[node_data->mNode->mTrianglesBegin], node_data->mNode->mNumTriangles, inStoreUserData, mTree, outError); + if (node_data->mTriangleStart == size_t(-1)) return false; } @@ -177,6 +231,10 @@ class AABBTreeToBuffer to_process.swap(to_process_triangles); } + // Assert that our reservation was correct (we don't know if we swapped the arrays or not) + JPH_ASSERT(to_process_max_size == to_process.capacity() || to_process_triangles_max_size == to_process.capacity()); + JPH_ASSERT(to_process_max_size == to_process_triangles.capacity() || to_process_triangles_max_size == to_process_triangles.capacity()); + // Finalize all nodes for (NodeData &n : node_list) if (!node_ctx.NodeFinalize(n.mNode, n.mNodeStart, n.mNumChildren, n.mChildNodeStart, n.mChildTrianglesStart, mTree, outError)) @@ -185,26 +243,20 @@ class AABBTreeToBuffer // Finalize the triangles tri_ctx.Finalize(inVertices, triangle_header, mTree); - // Validate that we reserved enough memory - if (nodes_size < mNodesSize) + // Validate that our reservations were correct + if (node_count != node_list.size()) { - outError = "Internal Error: Not enough memory reserved for nodes!"; + outError = "Internal Error: Node memory estimate was incorrect, memory corruption!"; return false; } - if (total_size < (uint)mTree.size()) + if (total_size != mTree.size()) { - outError = "Internal Error: Not enough memory reserved for triangles!"; + outError = "Internal Error: Tree memory estimate was incorrect, memory corruption!"; return false; } // Finalize the nodes - if (!node_ctx.Finalize(header, inRoot, node_list[0].mNodeStart, node_list[0].mTriangleStart, outError)) - return false; - - // Shrink the tree, this will invalidate the header and triangle_header variables - mTree.shrink_to_fit(); - - return true; + return node_ctx.Finalize(header, inRoot, node_list[0].mNodeStart, node_list[0].mTriangleStart, outError); } /// Get resulting data @@ -239,7 +291,6 @@ class AABBTreeToBuffer private: ByteBuffer mTree; ///< Resulting tree structure - uint mNodesSize; ///< Size in bytes of the nodes in the buffer }; JPH_NAMESPACE_END diff --git a/libs/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h b/libs/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h index e5376ea..c0feea7 100644 --- a/libs/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h +++ b/libs/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h @@ -10,7 +10,6 @@ JPH_NAMESPACE_BEGIN -template class NodeCodecQuadTreeHalfFloat { public: @@ -23,6 +22,8 @@ class NodeCodecQuadTreeHalfFloat Float3 mRootBoundsMin; Float3 mRootBoundsMax; uint32 mRootProperties; + uint8 mBlockIDBits; ///< Number of bits to address a triangle block + uint8 mPadding[3] = { 0 }; }; /// Size of the header (an empty struct is always > 0 bytes so this needs a separate variable) @@ -61,26 +62,30 @@ class NodeCodecQuadTreeHalfFloat class EncodingContext { public: - /// Get an upper bound on the amount of bytes needed for a node tree with inNodeCount nodes - uint GetPessimisticMemoryEstimate(uint inNodeCount) const + /// Mimics the size a call to NodeAllocate() would add to the buffer + void PrepareNodeAllocate(const AABBTreeBuilder::Node *inNode, uint64 &ioBufferSize) const { - return inNodeCount * (sizeof(Node) + Alignment - 1); + // We don't emit nodes for leafs + if (!inNode->HasChildren()) + return; + + // Add size of node + ioBufferSize += sizeof(Node); } /// Allocate a new node for inNode. /// Algorithm can modify the order of ioChildren to indicate in which order children should be compressed /// Algorithm can enlarge the bounding boxes of the children during compression and returns these in outChildBoundsMin, outChildBoundsMax /// inNodeBoundsMin, inNodeBoundsMax is the bounding box if inNode possibly widened by compressing the parent node - /// Returns uint(-1) on error and reports the error in outError - uint NodeAllocate(const AABBTreeBuilder::Node *inNode, Vec3Arg inNodeBoundsMin, Vec3Arg inNodeBoundsMax, Array &ioChildren, Vec3 outChildBoundsMin[NumChildrenPerNode], Vec3 outChildBoundsMax[NumChildrenPerNode], ByteBuffer &ioBuffer, const char *&outError) const + /// Returns size_t(-1) on error and reports the error in outError + size_t NodeAllocate(const AABBTreeBuilder::Node *inNode, Vec3Arg inNodeBoundsMin, Vec3Arg inNodeBoundsMax, Array &ioChildren, Vec3 outChildBoundsMin[NumChildrenPerNode], Vec3 outChildBoundsMax[NumChildrenPerNode], ByteBuffer &ioBuffer, const char *&outError) const { // We don't emit nodes for leafs if (!inNode->HasChildren()) - return (uint)ioBuffer.size(); + return ioBuffer.size(); - // Align the buffer - ioBuffer.Align(Alignment); - uint node_start = (uint)ioBuffer.size(); + // Remember the start of the node + size_t node_start = ioBuffer.size(); // Fill in bounds Node *node = ioBuffer.Allocate(); @@ -104,7 +109,7 @@ class NodeCodecQuadTreeHalfFloat if (this_node->GetTriangleCount() >= TRIANGLE_COUNT_MASK) { outError = "NodeCodecQuadTreeHalfFloat: Too many triangles"; - return uint(-1); + return size_t(-1); } } else @@ -133,7 +138,7 @@ class NodeCodecQuadTreeHalfFloat } /// Once all nodes have been added, this call finalizes all nodes by patching in the offsets of the child nodes (that were added after the node itself was added) - bool NodeFinalize(const AABBTreeBuilder::Node *inNode, uint inNodeStart, uint inNumChildren, const uint *inChildrenNodeStart, const uint *inChildrenTrianglesStart, ByteBuffer &ioBuffer, const char *&outError) const + bool NodeFinalize(const AABBTreeBuilder::Node *inNode, size_t inNodeStart, uint inNumChildren, const size_t *inChildrenNodeStart, const size_t *inChildrenTrianglesStart, ByteBuffer &ioBuffer, const char *&outError) { if (!inNode->HasChildren()) return true; @@ -141,46 +146,64 @@ class NodeCodecQuadTreeHalfFloat Node *node = ioBuffer.Get(inNodeStart); for (uint i = 0; i < inNumChildren; ++i) { - // If there are triangles, use the triangle offset otherwise use the node offset - uint offset = node->mNodeProperties[i] != 0? inChildrenTrianglesStart[i] : inChildrenNodeStart[i]; + size_t offset; + if (node->mNodeProperties[i] != 0) + { + // This is a triangle block + offset = inChildrenTrianglesStart[i]; + + // Store highest block with triangles so we can count the number of bits we need + mHighestTriangleBlock = max(mHighestTriangleBlock, offset); + } + else + { + // This is a node block + offset = inChildrenNodeStart[i]; + } + + // Store offset of next node / triangles if (offset & OFFSET_NON_SIGNIFICANT_MASK) { outError = "NodeCodecQuadTreeHalfFloat: Internal Error: Offset has non-significant bits set"; return false; } offset >>= OFFSET_NON_SIGNIFICANT_BITS; - if (offset & ~OFFSET_MASK) + if (offset > OFFSET_MASK) { outError = "NodeCodecQuadTreeHalfFloat: Offset too large. Too much data."; return false; } - - // Store offset of next node / triangles - node->mNodeProperties[i] |= offset; + node->mNodeProperties[i] |= uint32(offset); } return true; } /// Once all nodes have been finalized, this will finalize the header of the nodes - bool Finalize(Header *outHeader, const AABBTreeBuilder::Node *inRoot, uint inRootNodeStart, uint inRootTrianglesStart, const char *&outError) const + bool Finalize(Header *outHeader, const AABBTreeBuilder::Node *inRoot, size_t inRootNodeStart, size_t inRootTrianglesStart, const char *&outError) const { - uint offset = inRoot->HasChildren()? inRootNodeStart : inRootTrianglesStart; + // Check if we can address the root node + size_t offset = inRoot->HasChildren()? inRootNodeStart : inRootTrianglesStart; if (offset & OFFSET_NON_SIGNIFICANT_MASK) { outError = "NodeCodecQuadTreeHalfFloat: Internal Error: Offset has non-significant bits set"; return false; } offset >>= OFFSET_NON_SIGNIFICANT_BITS; - if (offset & ~OFFSET_MASK) + if (offset > OFFSET_MASK) { outError = "NodeCodecQuadTreeHalfFloat: Offset too large. Too much data."; return false; } + // If the root has triangles, we need to take that offset instead since the mHighestTriangleBlock will be zero + size_t highest_triangle_block = inRootTrianglesStart != size_t(-1)? inRootTrianglesStart : mHighestTriangleBlock; + highest_triangle_block >>= OFFSET_NON_SIGNIFICANT_BITS; + inRoot->mBounds.mMin.StoreFloat3(&outHeader->mRootBoundsMin); inRoot->mBounds.mMax.StoreFloat3(&outHeader->mRootBoundsMax); - outHeader->mRootProperties = offset + (inRoot->GetTriangleCount() << TRIANGLE_COUNT_SHIFT); + outHeader->mRootProperties = uint32(offset) + (inRoot->GetTriangleCount() << TRIANGLE_COUNT_SHIFT); + outHeader->mBlockIDBits = uint8(32 - CountLeadingZeros(uint32(highest_triangle_block))); if (inRoot->GetTriangleCount() >= TRIANGLE_COUNT_MASK) { outError = "NodeCodecQuadTreeHalfFloat: Too many triangles"; @@ -189,6 +212,9 @@ class NodeCodecQuadTreeHalfFloat return true; } + + private: + size_t mHighestTriangleBlock = 0; }; /// This class decodes and decompresses quad tree nodes @@ -196,9 +222,9 @@ class NodeCodecQuadTreeHalfFloat { public: /// Get the amount of bits needed to store an ID to a triangle block - inline static uint sTriangleBlockIDBits(const ByteBuffer &inTree) + inline static uint sTriangleBlockIDBits(const Header *inHeader) { - return 32 - CountLeadingZeros((uint32)inTree.size()) - OFFSET_NON_SIGNIFICANT_BITS; + return inHeader->mBlockIDBits; } /// Convert a triangle block ID to the start of the triangle buffer @@ -228,6 +254,15 @@ class NodeCodecQuadTreeHalfFloat const Node *node = reinterpret_cast(inBufferStart + (node_properties << OFFSET_NON_SIGNIFICANT_BITS)); // Unpack bounds + #ifdef JPH_CPU_BIG_ENDIAN + Vec4 bounds_minx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinX[0] + (node->mBoundsMinX[1] << 16), node->mBoundsMinX[2] + (node->mBoundsMinX[3] << 16), 0, 0)); + Vec4 bounds_miny = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinY[0] + (node->mBoundsMinY[1] << 16), node->mBoundsMinY[2] + (node->mBoundsMinY[3] << 16), 0, 0)); + Vec4 bounds_minz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinZ[0] + (node->mBoundsMinZ[1] << 16), node->mBoundsMinZ[2] + (node->mBoundsMinZ[3] << 16), 0, 0)); + + Vec4 bounds_maxx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxX[0] + (node->mBoundsMaxX[1] << 16), node->mBoundsMaxX[2] + (node->mBoundsMaxX[3] << 16), 0, 0)); + Vec4 bounds_maxy = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxY[0] + (node->mBoundsMaxY[1] << 16), node->mBoundsMaxY[2] + (node->mBoundsMaxY[3] << 16), 0, 0)); + Vec4 bounds_maxz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxZ[0] + (node->mBoundsMaxZ[1] << 16), node->mBoundsMaxZ[2] + (node->mBoundsMaxZ[3] << 16), 0, 0)); + #else UVec4 bounds_minxy = UVec4::sLoadInt4(reinterpret_cast(&node->mBoundsMinX[0])); Vec4 bounds_minx = HalfFloatConversion::ToFloat(bounds_minxy); Vec4 bounds_miny = HalfFloatConversion::ToFloat(bounds_minxy.Swizzle()); @@ -239,6 +274,7 @@ class NodeCodecQuadTreeHalfFloat UVec4 bounds_maxyz = UVec4::sLoadInt4(reinterpret_cast(&node->mBoundsMaxY[0])); Vec4 bounds_maxy = HalfFloatConversion::ToFloat(bounds_maxyz); Vec4 bounds_maxz = HalfFloatConversion::ToFloat(bounds_maxyz.Swizzle()); + #endif // Load properties for 4 children UVec4 properties = UVec4::sLoadInt4(&node->mNodeProperties[0]); diff --git a/libs/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h b/libs/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h index 7d29721..b3c33c5 100644 --- a/libs/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h +++ b/libs/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h @@ -13,6 +13,7 @@ JPH_NAMESPACE_BEGIN /// TriangleBlockHeader, /// TriangleBlock (4 triangles and their flags in 16 bytes), /// TriangleBlock... +/// [Optional] UserData (4 bytes per triangle) /// /// Vertices are stored: /// @@ -77,13 +78,24 @@ class TriangleCodecIndexed8BitPackSOA4Flags static_assert(sizeof(TriangleBlock) == 16, "Compiler added padding"); + enum ETriangleBlockHeaderFlags : uint32 + { + OFFSET_TO_VERTICES_BITS = 29, ///< Offset from current block to start of vertices in bytes + OFFSET_TO_VERTICES_MASK = (1 << OFFSET_TO_VERTICES_BITS) - 1, + OFFSET_NON_SIGNIFICANT_BITS = 2, ///< The offset from the current block to the start of the vertices must be a multiple of 4 bytes + OFFSET_NON_SIGNIFICANT_MASK = (1 << OFFSET_NON_SIGNIFICANT_BITS) - 1, + OFFSET_TO_USERDATA_BITS = 3, ///< When user data is stored, this is the number of blocks to skip to get to the user data (0 = no user data) + OFFSET_TO_USERDATA_MASK = (1 << OFFSET_TO_USERDATA_BITS) - 1, + }; + /// A triangle header, will be followed by one or more TriangleBlocks struct TriangleBlockHeader { - const VertexData * GetVertexData() const { return reinterpret_cast(reinterpret_cast(this) + mOffsetToVertices); } + const VertexData * GetVertexData() const { return reinterpret_cast(reinterpret_cast(this) + ((mFlags & OFFSET_TO_VERTICES_MASK) << OFFSET_NON_SIGNIFICANT_BITS)); } const TriangleBlock * GetTriangleBlock() const { return reinterpret_cast(reinterpret_cast(this) + sizeof(TriangleBlockHeader)); } + const uint32 * GetUserData() const { uint32 offset = mFlags >> OFFSET_TO_VERTICES_BITS; return offset == 0? nullptr : reinterpret_cast(GetTriangleBlock() + offset); } - uint32 mOffsetToVertices; ///< Offset from current block to start of vertices in bytes + uint32 mFlags; }; static_assert(sizeof(TriangleBlockHeader) == 4, "Compiler added padding"); @@ -122,44 +134,112 @@ class TriangleCodecIndexed8BitPackSOA4Flags class EncodingContext { public: + /// Indicates a vertex hasn't been seen yet in the triangle list + static constexpr uint32 cNotFound = 0xffffffff; + /// Construct the encoding context explicit EncodingContext(const VertexList &inVertices) : - mVertexMap(inVertices.size(), 0xffffffff) // Fill vertex map with 'not found' + mVertexMap(inVertices.size(), cNotFound) { - // Reserve for worst case to avoid allocating in the inner loop - mVertices.reserve(inVertices.size()); } - /// Get an upper bound on the amount of bytes needed to store inTriangleCount triangles - uint GetPessimisticMemoryEstimate(uint inTriangleCount) const + /// Mimics the size a call to Pack() would add to the buffer + void PreparePack(const IndexedTriangle *inTriangles, uint inNumTriangles, bool inStoreUserData, uint64 &ioBufferSize) { - // Worst case each triangle is alone in a block, none of the vertices are shared and we need to add 3 bytes to align the vertices - return inTriangleCount * (sizeof(TriangleBlockHeader) + sizeof(TriangleBlock) + 3 * sizeof(VertexData)) + 3; + // Add triangle block header + ioBufferSize += sizeof(TriangleBlockHeader); + + // Compute first vertex that this batch will use (ensuring there's enough room if none of the vertices are shared) + uint start_vertex = Clamp((int)mVertexCount - 256 + (int)inNumTriangles * 3, 0, (int)mVertexCount); + + // Pack vertices + uint padded_triangle_count = AlignUp(inNumTriangles, 4); + for (uint t = 0; t < padded_triangle_count; t += 4) + { + // Add triangle block header + ioBufferSize += sizeof(TriangleBlock); + + for (uint vertex_nr = 0; vertex_nr < 3; ++vertex_nr) + for (uint block_tri_idx = 0; block_tri_idx < 4; ++block_tri_idx) + { + // Fetch vertex index. Create degenerate triangles for padding triangles. + bool triangle_available = t + block_tri_idx < inNumTriangles; + uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[inNumTriangles - 1].mIdx[0]; + + // Check if we've seen this vertex before and if it is in the range that we can encode + uint32 &vertex_index = mVertexMap[src_vertex_index]; + if (vertex_index == cNotFound || vertex_index < start_vertex) + { + // Add vertex + vertex_index = mVertexCount; + mVertexCount++; + } + } + } + + // Add user data + if (inStoreUserData) + ioBufferSize += inNumTriangles * sizeof(uint32); + } + + /// Mimics the size the Finalize() call would add to ioBufferSize + void FinalizePreparePack(uint64 &ioBufferSize) + { + // Remember where the vertices are going to start in the output buffer + JPH_ASSERT(IsAligned(ioBufferSize, 4)); + mVerticesStartIdx = size_t(ioBufferSize); + + // Add vertices to buffer + ioBufferSize += uint64(mVertexCount) * sizeof(VertexData); + + // Reserve the amount of memory we need for the vertices + mVertices.reserve(mVertexCount); + + // Set vertex map back to 'not found' + for (uint32 &v : mVertexMap) + v = cNotFound; } /// Pack the triangles in inContainer to ioBuffer. This stores the mMaterialIndex of a triangle in the 8 bit flags. - /// Returns uint(-1) on error. - uint Pack(const IndexedTriangleList &inTriangles, ByteBuffer &ioBuffer, const char *&outError) + /// Returns size_t(-1) on error. + size_t Pack(const IndexedTriangle *inTriangles, uint inNumTriangles, bool inStoreUserData, ByteBuffer &ioBuffer, const char *&outError) { - // Determine position of triangles start - uint offset = (uint)ioBuffer.size(); + JPH_ASSERT(inNumTriangles > 0); - // Update stats - uint tri_count = (uint)inTriangles.size(); - mNumTriangles += tri_count; + // Determine position of triangles start + size_t triangle_block_start = ioBuffer.size(); // Allocate triangle block header TriangleBlockHeader *header = ioBuffer.Allocate(); // Compute first vertex that this batch will use (ensuring there's enough room if none of the vertices are shared) - uint start_vertex = Clamp((int)mVertices.size() - 256 + (int)tri_count * 3, 0, (int)mVertices.size()); + uint start_vertex = Clamp((int)mVertices.size() - 256 + (int)inNumTriangles * 3, 0, (int)mVertices.size()); + + // Store the start vertex offset relative to TriangleBlockHeader + size_t offset_to_vertices = mVerticesStartIdx - triangle_block_start + size_t(start_vertex) * sizeof(VertexData); + if (offset_to_vertices & OFFSET_NON_SIGNIFICANT_MASK) + { + outError = "TriangleCodecIndexed8BitPackSOA4Flags: Internal Error: Offset has non-significant bits set"; + return size_t(-1); + } + offset_to_vertices >>= OFFSET_NON_SIGNIFICANT_BITS; + if (offset_to_vertices > OFFSET_TO_VERTICES_MASK) + { + outError = "TriangleCodecIndexed8BitPackSOA4Flags: Offset to vertices doesn't fit. Too much data."; + return size_t(-1); + } + header->mFlags = uint32(offset_to_vertices); - // Store the start vertex offset, this will later be patched to give the delta offset relative to the triangle block - mOffsetsToPatch.push_back(uint((uint8 *)&header->mOffsetToVertices - &ioBuffer[0])); - header->mOffsetToVertices = start_vertex * sizeof(VertexData); + // When we store user data we need to store the offset to the user data in TriangleBlocks + uint padded_triangle_count = AlignUp(inNumTriangles, 4); + if (inStoreUserData) + { + uint32 num_blocks = padded_triangle_count >> 2; + JPH_ASSERT(num_blocks <= OFFSET_TO_USERDATA_MASK); + header->mFlags |= num_blocks << OFFSET_TO_VERTICES_BITS; + } // Pack vertices - uint padded_triangle_count = AlignUp(tri_count, 4); for (uint t = 0; t < padded_triangle_count; t += 4) { TriangleBlock *block = ioBuffer.Allocate(); @@ -167,12 +247,12 @@ class TriangleCodecIndexed8BitPackSOA4Flags for (uint block_tri_idx = 0; block_tri_idx < 4; ++block_tri_idx) { // Fetch vertex index. Create degenerate triangles for padding triangles. - bool triangle_available = t + block_tri_idx < tri_count; - uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[tri_count - 1].mIdx[0]; + bool triangle_available = t + block_tri_idx < inNumTriangles; + uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[inNumTriangles - 1].mIdx[0]; // Check if we've seen this vertex before and if it is in the range that we can encode uint32 &vertex_index = mVertexMap[src_vertex_index]; - if (vertex_index == 0xffffffff || vertex_index < start_vertex) + if (vertex_index == cNotFound || vertex_index < start_vertex) { // Add vertex vertex_index = (uint32)mVertices.size(); @@ -184,7 +264,7 @@ class TriangleCodecIndexed8BitPackSOA4Flags if (vertex_offset > 0xff) { outError = "TriangleCodecIndexed8BitPackSOA4Flags: Offset doesn't fit in 8 bit"; - return uint(-1); + return size_t(-1); } block->mIndices[vertex_nr][block_tri_idx] = (uint8)vertex_offset; @@ -193,29 +273,34 @@ class TriangleCodecIndexed8BitPackSOA4Flags if (flags > 0xff) { outError = "TriangleCodecIndexed8BitPackSOA4Flags: Material index doesn't fit in 8 bit"; - return uint(-1); + return size_t(-1); } block->mFlags[block_tri_idx] = (uint8)flags; } } - return offset; + // Store user data + if (inStoreUserData) + { + uint32 *user_data = ioBuffer.Allocate(inNumTriangles); + for (uint t = 0; t < inNumTriangles; ++t) + user_data[t] = inTriangles[t].mUserData; + } + + return triangle_block_start; } /// After all triangles have been packed, this finalizes the header and triangle buffer void Finalize(const VertexList &inVertices, TriangleHeader *ioHeader, ByteBuffer &ioBuffer) const { + // Assert that our reservations were correct + JPH_ASSERT(mVertices.size() == mVertexCount); + JPH_ASSERT(ioBuffer.size() == mVerticesStartIdx); + // Check if anything to do if (mVertices.empty()) return; - // Align buffer to 4 bytes - uint vertices_idx = (uint)ioBuffer.Align(4); - - // Patch the offsets - for (uint o : mOffsetsToPatch) - *ioBuffer.Get(o) += vertices_idx - o; - // Calculate bounding box AABox bounds; for (uint32 v : mVertices) @@ -243,17 +328,17 @@ class TriangleCodecIndexed8BitPackSOA4Flags private: using VertexMap = Array; - uint mNumTriangles = 0; - Array mVertices; ///< Output vertices as an index into the original vertex list (inVertices), sorted according to occurrence - VertexMap mVertexMap; ///< Maps from the original mesh vertex index (inVertices) to the index in our output vertices (mVertices) - Array mOffsetsToPatch; ///< Offsets to the vertex buffer that need to be patched in once all nodes have been packed + uint32 mVertexCount = 0; ///< Number of vertices calculated during PreparePack + size_t mVerticesStartIdx = 0; ///< Start of the vertices in the output buffer, calculated during PreparePack + Array mVertices; ///< Output vertices as an index into the original vertex list (inVertices), sorted according to occurrence + VertexMap mVertexMap; ///< Maps from the original mesh vertex index (inVertices) to the index in our output vertices (mVertices) }; /// This class is used to decode and decompress triangle data packed by the EncodingContext class DecodingContext { private: - /// Private helper functions to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.) + /// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.) JPH_INLINE void Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const { // Get compressed data @@ -271,6 +356,28 @@ class TriangleCodecIndexed8BitPackSOA4Flags outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ); } + /// Private helper function to unpack 4 triangles from a triangle block + JPH_INLINE void Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const + { + // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok) + UVec4 indices = UVec4::sLoadInt4(reinterpret_cast(&inBlock->mIndices[0])); + UVec4 iv1 = indices.Expand4Byte0(); + UVec4 iv2 = indices.Expand4Byte4(); + UVec4 iv3 = indices.Expand4Byte8(); + + #ifdef JPH_CPU_BIG_ENDIAN + // On big endian systems we need to reverse the bytes + iv1 = iv1.Swizzle(); + iv2 = iv2.Swizzle(); + iv3 = iv3.Swizzle(); + #endif + + // Decompress the triangle data + Unpack(inVertices, iv1, outX1, outY1, outZ1); + Unpack(inVertices, iv2, outX2, outY2, outZ2); + Unpack(inVertices, iv3, outX3, outY3, outZ3); + } + public: JPH_INLINE explicit DecodingContext(const TriangleHeader *inHeader) : mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)), @@ -295,17 +402,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags do { - // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok) - UVec4 indices = UVec4::sLoadInt4(reinterpret_cast(&t->mIndices[0])); - UVec4 iv1 = indices.Expand4Byte0(); - UVec4 iv2 = indices.Expand4Byte4(); - UVec4 iv3 = indices.Expand4Byte8(); - - // Decompress the triangle data + // Unpack the vertices for 4 triangles Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z; - Unpack(vertices, iv1, v1x, v1y, v1z); - Unpack(vertices, iv2, v2x, v2y, v2z); - Unpack(vertices, iv3, v3x, v3y, v3z); + Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); // Transpose it so we get normal vectors Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed(); @@ -340,17 +439,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags UVec4 start_triangle_idx = UVec4::sZero(); do { - // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok) - UVec4 indices = UVec4::sLoadInt4(reinterpret_cast(&t->mIndices[0])); - UVec4 iv1 = indices.Expand4Byte0(); - UVec4 iv2 = indices.Expand4Byte4(); - UVec4 iv3 = indices.Expand4Byte8(); - - // Decompress the triangle data + // Unpack the vertices for 4 triangles Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z; - Unpack(vertices, iv1, v1x, v1y, v1z); - Unpack(vertices, iv2, v2x, v2y, v2z); - Unpack(vertices, iv3, v3x, v3y, v3z); + Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); // Perform ray vs triangle test Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); @@ -409,6 +500,14 @@ class TriangleCodecIndexed8BitPackSOA4Flags outV3 = trans.GetAxisZ(); } + /// Get user data for a triangle + JPH_INLINE uint32 GetUserData(const void *inTriangleStart, uint32 inTriangleIdx) const + { + const TriangleBlockHeader *header = reinterpret_cast(inTriangleStart); + const uint32 *user_data = header->GetUserData(); + return user_data != nullptr? user_data[inTriangleIdx] : 0; + } + /// Get flags for entire triangle block JPH_INLINE static void sGetFlags(const void *inTriangleStart, uint32 inNumTriangles, uint8 *outTriangleFlags) { diff --git a/libs/Jolt/ConfigurationString.h b/libs/Jolt/ConfigurationString.h index 55e5d8c..1ff1969 100644 --- a/libs/Jolt/ConfigurationString.h +++ b/libs/Jolt/ConfigurationString.h @@ -14,8 +14,23 @@ inline const char *GetConfigurationString() "x86 " #elif defined(JPH_CPU_ARM) "ARM " -#elif defined(JPH_PLATFORM_WASM) +#elif defined(JPH_CPU_RISCV) + "RISC-V " +#elif defined(JPH_CPU_PPC) + "PowerPC " + #ifdef JPH_CPU_BIG_ENDIAN + "(Big Endian) " + #else + "(Little Endian) " + #endif +#elif defined(JPH_CPU_LOONGARCH) + "LoongArch " +#elif defined(JPH_CPU_E2K) + "E2K " +#elif defined(JPH_CPU_WASM) "WASM " +#else + #error Unknown CPU architecture #endif #if JPH_CPU_ADDRESS_BITS == 64 "64-bit " diff --git a/libs/Jolt/Core/Array.h b/libs/Jolt/Core/Array.h index ba6720e..b8dbce5 100644 --- a/libs/Jolt/Core/Array.h +++ b/libs/Jolt/Core/Array.h @@ -36,7 +36,9 @@ class [[nodiscard]] Array : private Allocator { public: using value_type = T; + using allocator_type = Allocator; using size_type = size_t; + using difference_type = typename Allocator::difference_type; using pointer = T *; using const_pointer = const T *; using reference = T &; @@ -45,6 +47,83 @@ class [[nodiscard]] Array : private Allocator using const_iterator = const T *; using iterator = T *; + /// An iterator that traverses the array in reverse order + class rev_it + { + public: + /// Constructor + rev_it() = default; + explicit rev_it(T *inValue) : mValue(inValue) { } + + /// Copying + rev_it(const rev_it &) = default; + rev_it & operator = (const rev_it &) = default; + + /// Comparison + bool operator == (const rev_it &inRHS) const { return mValue == inRHS.mValue; } + bool operator != (const rev_it &inRHS) const { return mValue != inRHS.mValue; } + + /// Arithmetics + rev_it & operator ++ () { --mValue; return *this; } + rev_it operator ++ (int) { return rev_it(mValue--); } + rev_it & operator -- () { ++mValue; return *this; } + rev_it operator -- (int) { return rev_it(mValue++); } + + rev_it operator + (int inValue) { return rev_it(mValue - inValue); } + rev_it operator - (int inValue) { return rev_it(mValue + inValue); } + + rev_it & operator += (int inValue) { mValue -= inValue; return *this; } + rev_it & operator -= (int inValue) { mValue += inValue; return *this; } + + /// Access + T & operator * () const { return *mValue; } + T & operator -> () const { return *mValue; } + + private: + T * mValue; + }; + + /// A const iterator that traverses the array in reverse order + class crev_it + { + public: + /// Constructor + crev_it() = default; + explicit crev_it(const T *inValue) : mValue(inValue) { } + + /// Copying + crev_it(const crev_it &) = default; + explicit crev_it(const rev_it &inValue) : mValue(inValue.mValue) { } + crev_it & operator = (const crev_it &) = default; + crev_it & operator = (const rev_it &inRHS) { mValue = inRHS.mValue; return *this; } + + /// Comparison + bool operator == (const crev_it &inRHS) const { return mValue == inRHS.mValue; } + bool operator != (const crev_it &inRHS) const { return mValue != inRHS.mValue; } + + /// Arithmetics + crev_it & operator ++ () { --mValue; return *this; } + crev_it operator ++ (int) { return crev_it(mValue--); } + crev_it & operator -- () { ++mValue; return *this; } + crev_it operator -- (int) { return crev_it(mValue++); } + + crev_it operator + (int inValue) { return crev_it(mValue - inValue); } + crev_it operator - (int inValue) { return crev_it(mValue + inValue); } + + crev_it & operator += (int inValue) { mValue -= inValue; return *this; } + crev_it & operator -= (int inValue) { mValue += inValue; return *this; } + + /// Access + const T & operator * () const { return *mValue; } + const T & operator -> () const { return *mValue; } + + private: + const T * mValue; + }; + + using reverse_iterator = rev_it; + using const_reverse_iterator = crev_it; + private: /// Move elements from one location to another inline void move(pointer inDestination, pointer inSource, size_type inCount) @@ -57,7 +136,7 @@ class [[nodiscard]] Array : private Allocator { for (T *destination_end = inDestination + inCount; inDestination < destination_end; ++inDestination, ++inSource) { - ::new (inDestination) T(std::move(*inSource)); + new (inDestination) T(std::move(*inSource)); inSource->~T(); } } @@ -65,7 +144,7 @@ class [[nodiscard]] Array : private Allocator { for (T *destination = inDestination + inCount - 1, *source = inSource + inCount - 1; destination >= inDestination; --destination, --source) { - ::new (destination) T(std::move(*source)); + new (destination) T(std::move(*source)); source->~T(); } } @@ -77,30 +156,30 @@ class [[nodiscard]] Array : private Allocator { JPH_ASSERT(inNewCapacity > 0 && inNewCapacity >= mSize); - pointer pointer; + pointer ptr; if constexpr (AllocatorHasReallocate::sValue) { // Reallocate data block - pointer = get_allocator().reallocate(mElements, mCapacity, inNewCapacity); + ptr = get_allocator().reallocate(mElements, mCapacity, inNewCapacity); } else { // Copy data to a new location - pointer = get_allocator().allocate(inNewCapacity); + ptr = get_allocator().allocate(inNewCapacity); if (mElements != nullptr) { - move(pointer, mElements, mSize); + move(ptr, mElements, mSize); get_allocator().deallocate(mElements, mCapacity); } } - mElements = pointer; + mElements = ptr; mCapacity = inNewCapacity; } /// Destruct elements [inStart, inEnd - 1] inline void destruct(size_type inStart, size_type inEnd) { - if constexpr (!is_trivially_destructible()) + if constexpr (!std::is_trivially_destructible()) if (inStart < inEnd) for (T *element = mElements + inStart, *element_end = mElements + inEnd; element < element_end; ++element) element->~T(); @@ -120,9 +199,9 @@ class [[nodiscard]] Array : private Allocator destruct(inNewSize, mSize); reserve(inNewSize); - if constexpr (!is_trivially_constructible()) + if constexpr (!std::is_trivially_constructible()) for (T *element = mElements + mSize, *element_end = mElements + inNewSize; element < element_end; ++element) - ::new (element) T; + new (element) T; mSize = inNewSize; } @@ -135,7 +214,7 @@ class [[nodiscard]] Array : private Allocator reserve(inNewSize); for (T *element = mElements + mSize, *element_end = mElements + inNewSize; element < element_end; ++element) - ::new (element) T(inValue); + new (element) T(inValue); mSize = inNewSize; } @@ -185,7 +264,7 @@ class [[nodiscard]] Array : private Allocator reserve(size_type(std::distance(inBegin, inEnd))); for (Iterator element = inBegin; element != inEnd; ++element) - ::new (&mElements[mSize++]) T(*element); + new (&mElements[mSize++]) T(*element); } /// Replace the contents of this array with inList @@ -195,7 +274,7 @@ class [[nodiscard]] Array : private Allocator reserve(size_type(inList.size())); for (const T &v : inList) - ::new (&mElements[mSize++]) T(v); + new (&mElements[mSize++]) T(v); } /// Default constructor @@ -279,7 +358,7 @@ class [[nodiscard]] Array : private Allocator grow(); T *element = mElements + mSize++; - ::new (element) T(inValue); + new (element) T(inValue); } inline void push_back(T &&inValue) @@ -287,7 +366,7 @@ class [[nodiscard]] Array : private Allocator grow(); T *element = mElements + mSize++; - ::new (element) T(std::move(inValue)); + new (element) T(std::move(inValue)); } /// Construct element at the back of the array @@ -297,7 +376,7 @@ class [[nodiscard]] Array : private Allocator grow(); T *element = mElements + mSize++; - ::new (element) T(std::forward(inValue)...); + new (element) T(std::forward(inValue)...); return *element; } @@ -363,7 +442,7 @@ class [[nodiscard]] Array : private Allocator move(element_end, element_begin, mSize - first_element); for (T *element = element_begin; element < element_end; ++element, ++inBegin) - ::new (element) T(*inBegin); + new (element) T(*inBegin); mSize += num_elements; } @@ -381,12 +460,12 @@ class [[nodiscard]] Array : private Allocator T *element = mElements + first_element; move(element + 1, element, mSize - first_element); - ::new (element) T(inValue); + new (element) T(inValue); mSize++; } /// Remove one element from the array - void erase(const_iterator inIter) + iterator erase(const_iterator inIter) { size_type p = size_type(inIter - begin()); JPH_ASSERT(p < mSize); @@ -394,10 +473,11 @@ class [[nodiscard]] Array : private Allocator if (p + 1 < mSize) move(mElements + p, mElements + p + 1, mSize - p - 1); --mSize; + return const_cast(inIter); } /// Remove multiple element from the array - void erase(const_iterator inBegin, const_iterator inEnd) + iterator erase(const_iterator inBegin, const_iterator inEnd) { size_type p = size_type(inBegin - begin()); size_type n = size_type(inEnd - inBegin); @@ -406,6 +486,7 @@ class [[nodiscard]] Array : private Allocator if (p + n < mSize) move(mElements + p, mElements + p + n, mSize - p - n); mSize -= n; + return const_cast(inBegin); } /// Iterators @@ -419,14 +500,34 @@ class [[nodiscard]] Array : private Allocator return mElements + mSize; } + inline crev_it rbegin() const + { + return crev_it(mElements + mSize - 1); + } + + inline crev_it rend() const + { + return crev_it(mElements - 1); + } + inline const_iterator cbegin() const { - return mElements; + return begin(); } inline const_iterator cend() const { - return mElements + mSize; + return end(); + } + + inline crev_it crbegin() const + { + return rbegin(); + } + + inline crev_it crend() const + { + return rend(); } inline iterator begin() @@ -439,6 +540,16 @@ class [[nodiscard]] Array : private Allocator return mElements + mSize; } + inline rev_it rbegin() + { + return rev_it(mElements + mSize - 1); + } + + inline rev_it rend() + { + return rev_it(mElements - 1); + } + inline const T * data() const { return mElements; @@ -560,6 +671,19 @@ class [[nodiscard]] Array : private Allocator return false; } + /// Get hash for this array + uint64 GetHash() const + { + // Hash length first + uint64 ret = Hash { } (uint32(size())); + + // Then hash elements + for (const T *element = mElements, *element_end = mElements + mSize; element < element_end; ++element) + HashCombine(ret, *element); + + return ret; + } + private: size_type mSize = 0; size_type mCapacity = 0; @@ -579,16 +703,7 @@ namespace std { size_t operator () (const JPH::Array &inRHS) const { - std::size_t ret = 0; - - // Hash length first - JPH::HashCombine(ret, inRHS.size()); - - // Then hash elements - for (const T &t : inRHS) - JPH::HashCombine(ret, t); - - return ret; + return std::size_t(inRHS.GetHash()); } }; } diff --git a/libs/Jolt/Core/BinaryHeap.h b/libs/Jolt/Core/BinaryHeap.h new file mode 100644 index 0000000..3c542e7 --- /dev/null +++ b/libs/Jolt/Core/BinaryHeap.h @@ -0,0 +1,96 @@ +// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics) +// SPDX-FileCopyrightText: 2024 Jorrit Rouwe +// SPDX-License-Identifier: MIT + +#pragma once + +JPH_NAMESPACE_BEGIN + +/// Push a new element into a binary max-heap. +/// [inBegin, inEnd - 1) must be a a valid heap. Element inEnd - 1 will be inserted into the heap. The heap will be [inBegin, inEnd) after this call. +/// inPred is a function that returns true if the first element is less or equal than the second element. +/// See: https://en.wikipedia.org/wiki/Binary_heap +template +void BinaryHeapPush(Iterator inBegin, Iterator inEnd, Pred inPred) +{ + using diff_t = typename std::iterator_traits::difference_type; + using elem_t = typename std::iterator_traits::value_type; + + // New heap size + diff_t count = std::distance(inBegin, inEnd); + + // Start from the last element + diff_t current = count - 1; + while (current > 0) + { + // Get current element + elem_t ¤t_elem = *(inBegin + current); + + // Get parent element + diff_t parent = (current - 1) >> 1; + elem_t &parent_elem = *(inBegin + parent); + + // Sort them so that the parent is larger than the child + if (inPred(parent_elem, current_elem)) + { + std::swap(parent_elem, current_elem); + current = parent; + } + else + { + // When there's no change, we're done + break; + } + } +} + +/// Pop an element from a binary max-heap. +/// [inBegin, inEnd) must be a valid heap. The largest element will be removed from the heap. The heap will be [inBegin, inEnd - 1) after this call. +/// inPred is a function that returns true if the first element is less or equal than the second element. +/// See: https://en.wikipedia.org/wiki/Binary_heap +template +void BinaryHeapPop(Iterator inBegin, Iterator inEnd, Pred inPred) +{ + using diff_t = typename std::iterator_traits::difference_type; + + // Begin by moving the highest element to the end, this is the popped element + std::swap(*(inEnd - 1), *inBegin); + + // New heap size + diff_t count = std::distance(inBegin, inEnd) - 1; + + // Start from the root + diff_t largest = 0; + for (;;) + { + // Get first child + diff_t child = (largest << 1) + 1; + + // Check if we're beyond the end of the heap, if so the 2nd child is also beyond the end + if (child >= count) + break; + + // Remember the largest element from the previous iteration + diff_t prev_largest = largest; + + // Check if first child is bigger, if so select it + if (inPred(*(inBegin + largest), *(inBegin + child))) + largest = child; + + // Switch to the second child + ++child; + + // Check if second child is bigger, if so select it + if (child < count && inPred(*(inBegin + largest), *(inBegin + child))) + largest = child; + + // If there was no change, we're done + if (prev_largest == largest) + break; + + // Swap element + std::swap(*(inBegin + prev_largest), *(inBegin + largest)); + } +} + +JPH_NAMESPACE_END diff --git a/libs/Jolt/Core/ByteBuffer.h b/libs/Jolt/Core/ByteBuffer.h index 48d1970..610b151 100644 --- a/libs/Jolt/Core/ByteBuffer.h +++ b/libs/Jolt/Core/ByteBuffer.h @@ -41,7 +41,7 @@ class ByteBuffer : public ByteBufferVector // Construct elements for (Type *d = data, *d_end = data + inSize; d < d_end; ++d) - ::new (d) Type; + new (d) Type; // Return pointer return data; diff --git a/libs/Jolt/Core/Color.h b/libs/Jolt/Core/Color.h index a2e906b..7706ca8 100644 --- a/libs/Jolt/Core/Color.h +++ b/libs/Jolt/Core/Color.h @@ -12,7 +12,7 @@ class Color; using ColorArg = Color; /// Class that holds an RGBA color with 8-bits per component -class [[nodiscard]] JPH_EXPORT_GCC_BUG_WORKAROUND Color +class JPH_EXPORT_GCC_BUG_WORKAROUND [[nodiscard]] Color { public: /// Constructors @@ -79,6 +79,6 @@ class [[nodiscard]] JPH_EXPORT_GCC_BUG_WORKAROUND Color }; }; -static_assert(is_trivial(), "Is supposed to be a trivial type!"); +static_assert(std::is_trivial(), "Is supposed to be a trivial type!"); JPH_NAMESPACE_END diff --git a/libs/Jolt/Core/Core.h b/libs/Jolt/Core/Core.h index 5e0e267..5be68b1 100644 --- a/libs/Jolt/Core/Core.h +++ b/libs/Jolt/Core/Core.h @@ -6,8 +6,8 @@ // Jolt library version #define JPH_VERSION_MAJOR 5 -#define JPH_VERSION_MINOR 1 -#define JPH_VERSION_PATCH 0 +#define JPH_VERSION_MINOR 3 +#define JPH_VERSION_PATCH 1 // Determine which features the library was compiled with #ifdef JPH_DOUBLE_PRECISION @@ -83,8 +83,8 @@ #define JPH_PLATFORM_ANDROID #elif defined(__linux__) #define JPH_PLATFORM_LINUX -#elif defined(__FreeBSD__) - #define JPH_PLATFORM_FREEBSD +#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + #define JPH_PLATFORM_BSD #elif defined(__APPLE__) #include #if defined(TARGET_OS_IPHONE) && !TARGET_OS_IPHONE @@ -180,10 +180,26 @@ #define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries #define JPH_DVECTOR_ALIGNMENT 8 #endif +#elif defined(__riscv) + // RISC-V CPU architecture + #define JPH_CPU_RISCV + #if __riscv_xlen == 64 + #define JPH_CPU_ADDRESS_BITS 64 + #define JPH_VECTOR_ALIGNMENT 16 + #define JPH_DVECTOR_ALIGNMENT 32 + #else + #define JPH_CPU_ADDRESS_BITS 32 + #define JPH_VECTOR_ALIGNMENT 16 + #define JPH_DVECTOR_ALIGNMENT 8 + #endif #elif defined(JPH_PLATFORM_WASM) // WebAssembly CPU architecture #define JPH_CPU_WASM - #define JPH_CPU_ADDRESS_BITS 32 + #if defined(__wasm64__) + #define JPH_CPU_ADDRESS_BITS 64 + #else + #define JPH_CPU_ADDRESS_BITS 32 + #endif #define JPH_VECTOR_ALIGNMENT 16 #define JPH_DVECTOR_ALIGNMENT 32 #ifdef __wasm_simd128__ @@ -191,6 +207,29 @@ #define JPH_USE_SSE4_1 #define JPH_USE_SSE4_2 #endif +#elif defined(__powerpc__) || defined(__powerpc64__) + // PowerPC CPU architecture + #define JPH_CPU_PPC + #if defined(__powerpc64__) + #define JPH_CPU_ADDRESS_BITS 64 + #else + #define JPH_CPU_ADDRESS_BITS 32 + #endif + #ifdef _BIG_ENDIAN + #define JPH_CPU_BIG_ENDIAN + #endif + #define JPH_VECTOR_ALIGNMENT 16 + #define JPH_DVECTOR_ALIGNMENT 8 +#elif defined(__loongarch__) + // LoongArch CPU architecture + #define JPH_CPU_LOONGARCH + #if defined(__loongarch64) + #define JPH_CPU_ADDRESS_BITS 64 + #else + #define JPH_CPU_ADDRESS_BITS 32 + #endif + #define JPH_VECTOR_ALIGNMENT 16 + #define JPH_DVECTOR_ALIGNMENT 8 #elif defined(__e2k__) // E2K CPU architecture (MCST Elbrus 2000) #define JPH_CPU_E2K @@ -210,7 +249,7 @@ #ifdef JPH_SHARED_LIBRARY #ifdef JPH_BUILD_SHARED_LIBRARY // While building the shared library, we must export these symbols - #ifdef JPH_PLATFORM_WINDOWS + #if defined(JPH_PLATFORM_WINDOWS) && !defined(JPH_COMPILER_MINGW) #define JPH_EXPORT __declspec(dllexport) #else #define JPH_EXPORT __attribute__ ((visibility ("default"))) @@ -221,7 +260,7 @@ #endif #else // When linking against Jolt, we must import these symbols - #ifdef JPH_PLATFORM_WINDOWS + #if defined(JPH_PLATFORM_WINDOWS) && !defined(JPH_COMPILER_MINGW) #define JPH_EXPORT __declspec(dllimport) #else #define JPH_EXPORT __attribute__ ((visibility ("default"))) @@ -308,6 +347,7 @@ JPH_CLANG_SUPPRESS_WARNING("-Wgnu-zero-variadic-macro-arguments") \ JPH_CLANG_SUPPRESS_WARNING("-Wdocumentation-unknown-command") \ JPH_CLANG_SUPPRESS_WARNING("-Wctad-maybe-unsupported") \ + JPH_CLANG_SUPPRESS_WARNING("-Wswitch-default") \ JPH_CLANG_13_PLUS_SUPPRESS_WARNING("-Wdeprecated-copy") \ JPH_CLANG_13_PLUS_SUPPRESS_WARNING("-Wdeprecated-copy-with-dtor") \ JPH_CLANG_16_PLUS_SUPPRESS_WARNING("-Wunsafe-buffer-usage") \ @@ -324,6 +364,7 @@ JPH_MSVC_SUPPRESS_WARNING(4514) /* 'X' : unreferenced inline function has been removed */ \ JPH_MSVC_SUPPRESS_WARNING(4710) /* 'X' : function not inlined */ \ JPH_MSVC_SUPPRESS_WARNING(4711) /* function 'X' selected for automatic inline expansion */ \ + JPH_MSVC_SUPPRESS_WARNING(4714) /* function 'X' marked as __forceinline not inlined */ \ JPH_MSVC_SUPPRESS_WARNING(4820) /* 'X': 'Y' bytes padding added after data member 'Z' */ \ JPH_MSVC_SUPPRESS_WARNING(4100) /* 'X' : unreferenced formal parameter */ \ JPH_MSVC_SUPPRESS_WARNING(4626) /* 'X' : assignment operator was implicitly defined as deleted because a base class assignment operator is inaccessible or deleted */ \ @@ -352,15 +393,15 @@ // Configuration for a popular game console. // This file is not distributed because it would violate an NDA. // Creating one should only be a couple of minutes of work if you have the documentation for the platform - // (you only need to define JPH_BREAKPOINT, JPH_PLATFORM_BLUE_GET_TICKS, JPH_PLATFORM_BLUE_MUTEX*, JPH_PLATFORM_BLUE_RWLOCK* and include the right header). + // (you only need to define JPH_BREAKPOINT, JPH_PLATFORM_BLUE_GET_TICKS, JPH_PLATFORM_BLUE_MUTEX*, JPH_PLATFORM_BLUE_RWLOCK*, JPH_PLATFORM_BLUE_SEMAPHORE* and include the right header). #include -#elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) || defined(JPH_PLATFORM_FREEBSD) +#elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) || defined(JPH_PLATFORM_BSD) #if defined(JPH_CPU_X86) #define JPH_BREAKPOINT __asm volatile ("int $0x3") - #elif defined(JPH_CPU_ARM) - #define JPH_BREAKPOINT __builtin_trap() - #elif defined(JPH_CPU_E2K) + #elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) #define JPH_BREAKPOINT __builtin_trap() + #else + #error Unknown CPU architecture #endif #elif defined(JPH_PLATFORM_WASM) #define JPH_BREAKPOINT do { } while (false) // Not supported @@ -390,7 +431,8 @@ JPH_MSVC_SUPPRESS_WARNING(4514) \ JPH_MSVC_SUPPRESS_WARNING(5262) \ JPH_MSVC_SUPPRESS_WARNING(5264) \ - JPH_MSVC_SUPPRESS_WARNING(4738) + JPH_MSVC_SUPPRESS_WARNING(4738) \ + JPH_MSVC_SUPPRESS_WARNING(5045) #define JPH_SUPPRESS_WARNINGS_STD_END \ JPH_SUPPRESS_WARNING_POP @@ -424,7 +466,6 @@ JPH_SUPPRESS_WARNINGS_STD_END JPH_NAMESPACE_BEGIN // Commonly used STL types -using std::pair; using std::min; using std::max; using std::abs; @@ -434,17 +475,11 @@ using std::floor; using std::trunc; using std::round; using std::fmod; -using std::swap; -using std::size; -using std::string; using std::string_view; using std::function; using std::numeric_limits; using std::isfinite; using std::isnan; -using std::is_trivial; -using std::is_trivially_constructible; -using std::is_trivially_destructible; using std::ostream; using std::istream; @@ -578,4 +613,22 @@ static_assert(sizeof(void *) == (JPH_CPU_ADDRESS_BITS == 64? 8 : 4), "Invalid si #error Undefined #endif +// Check if Thread Sanitizer is enabled +#ifdef __has_feature + #if __has_feature(thread_sanitizer) + #define JPH_TSAN_ENABLED + #endif +#else + #ifdef __SANITIZE_THREAD__ + #define JPH_TSAN_ENABLED + #endif +#endif + +// Attribute to disable Thread Sanitizer for a particular function +#ifdef JPH_TSAN_ENABLED + #define JPH_TSAN_NO_SANITIZE __attribute__((no_sanitize("thread"))) +#else + #define JPH_TSAN_NO_SANITIZE +#endif + JPH_NAMESPACE_END diff --git a/libs/Jolt/Core/FPControlWord.h b/libs/Jolt/Core/FPControlWord.h index 0c8b3f1..9fceee4 100644 --- a/libs/Jolt/Core/FPControlWord.h +++ b/libs/Jolt/Core/FPControlWord.h @@ -126,6 +126,14 @@ class FPControlWord : public NonCopyable uint32 mPrevState; }; +#elif defined(JPH_CPU_RISCV) + +// RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions. + +#elif defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) + +// Not implemented right now + #else #error Unsupported CPU architecture diff --git a/libs/Jolt/Core/FPException.h b/libs/Jolt/Core/FPException.h index 3083f05..9e22f54 100644 --- a/libs/Jolt/Core/FPException.h +++ b/libs/Jolt/Core/FPException.h @@ -16,11 +16,12 @@ JPH_NAMESPACE_BEGIN class FPExceptionsEnable { }; class FPExceptionDisableInvalid { }; class FPExceptionDisableDivByZero { }; +class FPExceptionDisableOverflow { }; #elif defined(JPH_USE_SSE) -/// Enable floating point divide by zero exception and exceptions on invalid numbers -class FPExceptionsEnable : public FPControlWord<0, _MM_MASK_DIV_ZERO | _MM_MASK_INVALID> { }; +/// Enable floating point divide by zero exception, overflow exceptions and exceptions on invalid numbers +class FPExceptionsEnable : public FPControlWord<0, _MM_MASK_DIV_ZERO | _MM_MASK_INVALID | _MM_MASK_OVERFLOW> { }; /// Disable invalid floating point value exceptions class FPExceptionDisableInvalid : public FPControlWord<_MM_MASK_INVALID, _MM_MASK_INVALID> { }; @@ -28,10 +29,13 @@ class FPExceptionDisableInvalid : public FPControlWord<_MM_MASK_INVALID, _MM_MAS /// Disable division by zero floating point exceptions class FPExceptionDisableDivByZero : public FPControlWord<_MM_MASK_DIV_ZERO, _MM_MASK_DIV_ZERO> { }; +/// Disable floating point overflow exceptions +class FPExceptionDisableOverflow : public FPControlWord<_MM_MASK_OVERFLOW, _MM_MASK_OVERFLOW> { }; + #elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC) -/// Enable floating point divide by zero exception and exceptions on invalid numbers -class FPExceptionsEnable : public FPControlWord<0, _EM_INVALID | _EM_ZERODIVIDE> { }; +/// Enable floating point divide by zero exception, overflow exceptions and exceptions on invalid numbers +class FPExceptionsEnable : public FPControlWord<0, _EM_INVALID | _EM_ZERODIVIDE | _EM_OVERFLOW> { }; /// Disable invalid floating point value exceptions class FPExceptionDisableInvalid : public FPControlWord<_EM_INVALID, _EM_INVALID> { }; @@ -39,6 +43,9 @@ class FPExceptionDisableInvalid : public FPControlWord<_EM_INVALID, _EM_INVALID> /// Disable division by zero floating point exceptions class FPExceptionDisableDivByZero : public FPControlWord<_EM_ZERODIVIDE, _EM_ZERODIVIDE> { }; +/// Disable floating point overflow exceptions +class FPExceptionDisableOverflow : public FPControlWord<_EM_OVERFLOW, _EM_OVERFLOW> { }; + #elif defined(JPH_CPU_ARM) /// Invalid operation exception bit @@ -47,8 +54,11 @@ static constexpr uint64 FP_IOE = 1 << 8; /// Enable divide by zero exception bit static constexpr uint64 FP_DZE = 1 << 9; -/// Enable floating point divide by zero exception and exceptions on invalid numbers -class FPExceptionsEnable : public FPControlWord { }; +/// Enable floating point overflow bit +static constexpr uint64 FP_OFE = 1 << 10; + +/// Enable floating point divide by zero exception, overflow exceptions and exceptions on invalid numbers +class FPExceptionsEnable : public FPControlWord { }; /// Disable invalid floating point value exceptions class FPExceptionDisableInvalid : public FPControlWord<0, FP_IOE> { }; @@ -56,6 +66,17 @@ class FPExceptionDisableInvalid : public FPControlWord<0, FP_IOE> { }; /// Disable division by zero floating point exceptions class FPExceptionDisableDivByZero : public FPControlWord<0, FP_DZE> { }; +/// Disable floating point overflow exceptions +class FPExceptionDisableOverflow : public FPControlWord<0, FP_OFE> { }; + +#elif defined(JPH_CPU_RISCV) + +#error "RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions. JPH_FLOATING_POINT_EXCEPTIONS_ENABLED must be disabled." + +#elif defined(JPH_CPU_PPC) + +#error PowerPC floating point exception handling to be implemented. JPH_FLOATING_POINT_EXCEPTIONS_ENABLED must be disabled. + #else #error Unsupported CPU architecture @@ -68,6 +89,7 @@ class FPExceptionDisableDivByZero : public FPControlWord<0, FP_DZE> { }; class FPExceptionsEnable { }; class FPExceptionDisableInvalid { }; class FPExceptionDisableDivByZero { }; +class FPExceptionDisableOverflow { }; #endif diff --git a/libs/Jolt/Core/FPFlushDenormals.h b/libs/Jolt/Core/FPFlushDenormals.h index 672a19d..74a2c10 100644 --- a/libs/Jolt/Core/FPFlushDenormals.h +++ b/libs/Jolt/Core/FPFlushDenormals.h @@ -8,7 +8,7 @@ JPH_NAMESPACE_BEGIN -#if defined(JPH_CPU_WASM) +#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) // Not supported class FPFlushDenormals { }; @@ -21,6 +21,8 @@ class FPFlushDenormals : public FPControlWord<_MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_ #elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC) +/// Helper class that needs to be put on the stack to enable flushing denormals to zero +/// This can make floating point operations much faster when working with very small numbers class FPFlushDenormals : public FPControlWord<_DN_FLUSH, _MCW_DN> { }; #elif defined(JPH_CPU_ARM) diff --git a/libs/Jolt/Core/Factory.cpp b/libs/Jolt/Core/Factory.cpp index f5b9dac..1890c03 100644 --- a/libs/Jolt/Core/Factory.cpp +++ b/libs/Jolt/Core/Factory.cpp @@ -64,6 +64,9 @@ bool Factory::Register(const RTTI *inRTTI) bool Factory::Register(const RTTI **inRTTIs, uint inNumber) { + mClassHashMap.reserve(mClassHashMap.size() + inNumber); + mClassNameMap.reserve(mClassNameMap.size() + inNumber); + for (const RTTI **rtti = inRTTIs; rtti < inRTTIs + inNumber; ++rtti) if (!Register(*rtti)) return false; diff --git a/libs/Jolt/Core/FixedSizeFreeList.inl b/libs/Jolt/Core/FixedSizeFreeList.inl index dbaae43..3fe40b8 100644 --- a/libs/Jolt/Core/FixedSizeFreeList.inl +++ b/libs/Jolt/Core/FixedSizeFreeList.inl @@ -79,7 +79,7 @@ uint32 FixedSizeFreeList::ConstructObject(Parameters &&... inParameters) // Allocation successful JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_sub(1, memory_order_relaxed);) ObjectStorage &storage = GetStorage(first_free); - ::new (&storage.mObject) Object(std::forward(inParameters)...); + new (&storage.mObject) Object(std::forward(inParameters)...); storage.mNextFreeObject.store(first_free, memory_order_release); return first_free; } @@ -97,7 +97,7 @@ uint32 FixedSizeFreeList::ConstructObject(Parameters &&... inParameters) // Allocation successful JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_sub(1, memory_order_relaxed);) ObjectStorage &storage = GetStorage(first_free); - ::new (&storage.mObject) Object(std::forward(inParameters)...); + new (&storage.mObject) Object(std::forward(inParameters)...); storage.mNextFreeObject.store(first_free, memory_order_release); return first_free; } @@ -108,9 +108,13 @@ uint32 FixedSizeFreeList::ConstructObject(Parameters &&... inParameters) template void FixedSizeFreeList::AddObjectToBatch(Batch &ioBatch, uint32 inObjectIndex) { - JPH_ASSERT(GetStorage(inObjectIndex).mNextFreeObject.load(memory_order_relaxed) == inObjectIndex, "Trying to add a object to the batch that is already in a free list"); JPH_ASSERT(ioBatch.mNumObjects != uint32(-1), "Trying to reuse a batch that has already been freed"); + // Reset next index + atomic &next_free_object = GetStorage(inObjectIndex).mNextFreeObject; + JPH_ASSERT(next_free_object.load(memory_order_relaxed) == inObjectIndex, "Trying to add a object to the batch that is already in a free list"); + next_free_object.store(cInvalidObjectIndex, memory_order_release); + // Link object in batch to free if (ioBatch.mFirstObjectIndex == cInvalidObjectIndex) ioBatch.mFirstObjectIndex = inObjectIndex; @@ -126,7 +130,7 @@ void FixedSizeFreeList::DestructObjectBatch(Batch &ioBatch) if (ioBatch.mFirstObjectIndex != cInvalidObjectIndex) { // Call destructors - if constexpr (!is_trivially_destructible()) + if constexpr (!std::is_trivially_destructible()) { uint32 object_idx = ioBatch.mFirstObjectIndex; do diff --git a/libs/Jolt/Core/HashCombine.h b/libs/Jolt/Core/HashCombine.h index d3d79f6..ab62084 100644 --- a/libs/Jolt/Core/HashCombine.h +++ b/libs/Jolt/Core/HashCombine.h @@ -17,8 +17,21 @@ inline uint64 HashBytes(const void *inData, uint inSize, uint64 inSeed = 0xcbf29 uint64 hash = inSeed; for (const uint8 *data = reinterpret_cast(inData); data < reinterpret_cast(inData) + inSize; ++data) { - hash = hash ^ uint64(*data); - hash = hash * 0x100000001b3UL; + hash ^= uint64(*data); + hash *= 0x100000001b3UL; + } + return hash; +} + +/// Calculate the FNV-1a hash of inString. +/// @see https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function +constexpr uint64 HashString(const char *inString, uint64 inSeed = 0xcbf29ce484222325UL) +{ + uint64 hash = inSeed; + for (const char *c = inString; *c != 0; ++c) + { + hash ^= uint64(*c); + hash *= 0x100000001b3UL; } return hash; } @@ -40,13 +53,122 @@ inline uint64 Hash64(uint64 inValue) return hash; } -/// @brief Helper function that hashes a single value into ioSeed -/// Taken from: https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x +/// Fallback hash function that calls T::GetHash() +template +struct Hash +{ + uint64 operator () (const T &inValue) const + { + return inValue.GetHash(); + } +}; + +/// A hash function for floats +template <> +struct Hash +{ + uint64 operator () (float inValue) const + { + float value = inValue == 0.0f? 0.0f : inValue; // Convert -0.0f to 0.0f + return HashBytes(&value, sizeof(value)); + } +}; + +/// A hash function for doubles +template <> +struct Hash +{ + uint64 operator () (double inValue) const + { + double value = inValue == 0.0? 0.0 : inValue; // Convert -0.0 to 0.0 + return HashBytes(&value, sizeof(value)); + } +}; + +/// A hash function for character pointers +template <> +struct Hash +{ + uint64 operator () (const char *inValue) const + { + return HashString(inValue); + } +}; + +/// A hash function for std::string_view +template <> +struct Hash +{ + uint64 operator () (const std::string_view &inValue) const + { + return HashBytes(inValue.data(), uint(inValue.size())); + } +}; + +/// A hash function for String +template <> +struct Hash +{ + uint64 operator () (const String &inValue) const + { + return HashBytes(inValue.data(), uint(inValue.size())); + } +}; + +/// A fallback function for generic pointers +template +struct Hash +{ + uint64 operator () (T *inValue) const + { + return HashBytes(&inValue, sizeof(inValue)); + } +}; + +/// Helper macro to define a hash function for trivial types +#define JPH_DEFINE_TRIVIAL_HASH(type) \ +template <> \ +struct Hash \ +{ \ + uint64 operator () (const type &inValue) const \ + { \ + return HashBytes(&inValue, sizeof(inValue)); \ + } \ +}; + +/// Commonly used types +JPH_DEFINE_TRIVIAL_HASH(char) +JPH_DEFINE_TRIVIAL_HASH(int) +JPH_DEFINE_TRIVIAL_HASH(uint32) +JPH_DEFINE_TRIVIAL_HASH(uint64) + +/// Helper function that hashes a single value into ioSeed +/// Based on https://github.com/jonmaiga/mx3 by Jon Maiga template -inline void HashCombineHelper(size_t &ioSeed, const T &inValue) +inline void HashCombine(uint64 &ioSeed, const T &inValue) { - std::hash hasher; - ioSeed ^= hasher(inValue) + 0x9e3779b9 + (ioSeed << 6) + (ioSeed >> 2); + constexpr uint64 c = 0xbea225f9eb34556dUL; + + uint64 h = ioSeed; + uint64 x = Hash { } (inValue); + + // See: https://github.com/jonmaiga/mx3/blob/master/mx3.h + // mix_stream(h, x) + x *= c; + x ^= x >> 39; + h += x * c; + h *= c; + + // mix(h) + h ^= h >> 32; + h *= c; + h ^= h >> 29; + h *= c; + h ^= h >> 32; + h *= c; + h ^= h >> 29; + + ioSeed = h; } /// Hash combiner to use a custom struct in an unordered map or set @@ -61,37 +183,52 @@ inline void HashCombineHelper(size_t &ioSeed, const T &inValue) /// }; /// /// JPH_MAKE_HASHABLE(SomeHashKey, t.key1, t.key2, t.key3) -template -inline void HashCombine(std::size_t &ioSeed, Values... inValues) +template +inline uint64 HashCombineArgs(const FirstValue &inFirstValue, Values... inValues) { - // Hash all values together using a fold expression - (HashCombineHelper(ioSeed, inValues), ...); -} + // Prime the seed by hashing the first value + uint64 seed = Hash { } (inFirstValue); -JPH_NAMESPACE_END + // Hash all remaining values together using a fold expression + (HashCombine(seed, inValues), ...); -JPH_SUPPRESS_WARNING_PUSH -JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic") + return seed; +} #define JPH_MAKE_HASH_STRUCT(type, name, ...) \ struct [[nodiscard]] name \ { \ - std::size_t operator()(const type &t) const \ + ::JPH::uint64 operator()(const type &t) const \ { \ - std::size_t ret = 0; \ - ::JPH::HashCombine(ret, __VA_ARGS__); \ - return ret; \ + return ::JPH::HashCombineArgs(__VA_ARGS__); \ } \ }; -#define JPH_MAKE_HASHABLE(type, ...) \ +#define JPH_MAKE_STD_HASH(type) \ JPH_SUPPRESS_WARNING_PUSH \ JPH_SUPPRESS_WARNINGS \ namespace std \ { \ template<> \ - JPH_MAKE_HASH_STRUCT(type, hash, __VA_ARGS__) \ + struct [[nodiscard]] hash \ + { \ + size_t operator()(const type &t) const \ + { \ + return size_t(::JPH::Hash{ }(t)); \ + } \ + }; \ } \ JPH_SUPPRESS_WARNING_POP -JPH_SUPPRESS_WARNING_POP +#define JPH_MAKE_HASHABLE(type, ...) \ + JPH_SUPPRESS_WARNING_PUSH \ + JPH_SUPPRESS_WARNINGS \ + namespace JPH \ + { \ + template<> \ + JPH_MAKE_HASH_STRUCT(type, Hash, __VA_ARGS__) \ + } \ + JPH_SUPPRESS_WARNING_POP \ + JPH_MAKE_STD_HASH(type) + +JPH_NAMESPACE_END diff --git a/libs/Jolt/Core/HashTable.h b/libs/Jolt/Core/HashTable.h new file mode 100644 index 0000000..d2d766f --- /dev/null +++ b/libs/Jolt/Core/HashTable.h @@ -0,0 +1,872 @@ +// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics) +// SPDX-FileCopyrightText: 2024 Jorrit Rouwe +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +JPH_NAMESPACE_BEGIN + +/// Helper class for implementing an UnorderedSet or UnorderedMap +/// Based on CppCon 2017: Matt Kulukundis "Designing a Fast, Efficient, Cache-friendly Hash Table, Step by Step" +/// See: https://www.youtube.com/watch?v=ncHmEUmJZf4 +template +class HashTable +{ +public: + /// Properties + using value_type = KeyValue; + using size_type = uint32; + using difference_type = ptrdiff_t; + +private: + /// Base class for iterators + template + class IteratorBase + { + public: + /// Properties + using difference_type = typename Table::difference_type; + using value_type = typename Table::value_type; + using iterator_category = std::forward_iterator_tag; + + /// Copy constructor + IteratorBase(const IteratorBase &inRHS) = default; + + /// Assignment operator + IteratorBase & operator = (const IteratorBase &inRHS) = default; + + /// Iterator at start of table + explicit IteratorBase(Table *inTable) : + mTable(inTable), + mIndex(0) + { + while (mIndex < mTable->mMaxSize && (mTable->mControl[mIndex] & cBucketUsed) == 0) + ++mIndex; + } + + /// Iterator at specific index + IteratorBase(Table *inTable, size_type inIndex) : + mTable(inTable), + mIndex(inIndex) + { + } + + /// Prefix increment + Iterator & operator ++ () + { + JPH_ASSERT(IsValid()); + + do + { + ++mIndex; + } + while (mIndex < mTable->mMaxSize && (mTable->mControl[mIndex] & cBucketUsed) == 0); + + return static_cast(*this); + } + + /// Postfix increment + Iterator operator ++ (int) + { + Iterator result(mTable, mIndex); + ++(*this); + return result; + } + + /// Access to key value pair + const KeyValue & operator * () const + { + JPH_ASSERT(IsValid()); + return mTable->mData[mIndex]; + } + + /// Access to key value pair + const KeyValue * operator -> () const + { + JPH_ASSERT(IsValid()); + return mTable->mData + mIndex; + } + + /// Equality operator + bool operator == (const Iterator &inRHS) const + { + return mIndex == inRHS.mIndex && mTable == inRHS.mTable; + } + + /// Inequality operator + bool operator != (const Iterator &inRHS) const + { + return !(*this == inRHS); + } + + /// Check that the iterator is valid + bool IsValid() const + { + return mIndex < mTable->mMaxSize + && (mTable->mControl[mIndex] & cBucketUsed) != 0; + } + + Table * mTable; + size_type mIndex; + }; + + /// Get the maximum number of elements that we can support given a number of buckets + static constexpr size_type sGetMaxLoad(size_type inBucketCount) + { + return uint32((cMaxLoadFactorNumerator * inBucketCount) / cMaxLoadFactorDenominator); + } + + /// Update the control value for a bucket + JPH_INLINE void SetControlValue(size_type inIndex, uint8 inValue) + { + JPH_ASSERT(inIndex < mMaxSize); + mControl[inIndex] = inValue; + + // Mirror the first 15 bytes to the 15 bytes beyond mMaxSize + // Note that this is equivalent to: + // if (inIndex < 15) + // mControl[inIndex + mMaxSize] = inValue + // else + // mControl[inIndex] = inValue + // Which performs a needless write if inIndex >= 15 but at least it is branch-less + mControl[((inIndex - 15) & (mMaxSize - 1)) + 15] = inValue; + } + + /// Get the index and control value for a particular key + JPH_INLINE void GetIndexAndControlValue(const Key &inKey, size_type &outIndex, uint8 &outControl) const + { + // Calculate hash + uint64 hash_value = Hash { } (inKey); + + // Split hash into index and control value + outIndex = size_type(hash_value >> 7) & (mMaxSize - 1); + outControl = cBucketUsed | uint8(hash_value); + } + + /// Allocate space for the hash table + void AllocateTable(size_type inMaxSize) + { + JPH_ASSERT(mData == nullptr); + + mMaxSize = inMaxSize; + mLoadLeft = sGetMaxLoad(inMaxSize); + size_t required_size = size_t(mMaxSize) * (sizeof(KeyValue) + 1) + 15; // Add 15 bytes to mirror the first 15 bytes of the control values + if constexpr (cNeedsAlignedAllocate) + mData = reinterpret_cast(AlignedAllocate(required_size, alignof(KeyValue))); + else + mData = reinterpret_cast(Allocate(required_size)); + mControl = reinterpret_cast(mData + mMaxSize); + } + + /// Copy the contents of another hash table + void CopyTable(const HashTable &inRHS) + { + if (inRHS.empty()) + return; + + AllocateTable(inRHS.mMaxSize); + + // Copy control bytes + memcpy(mControl, inRHS.mControl, mMaxSize + 15); + + // Copy elements + uint index = 0; + for (const uint8 *control = mControl, *control_end = mControl + mMaxSize; control != control_end; ++control, ++index) + if (*control & cBucketUsed) + new (mData + index) KeyValue(inRHS.mData[index]); + mSize = inRHS.mSize; + } + + /// Grow the table to a new size + void GrowTable(size_type inNewMaxSize) + { + // Move the old table to a temporary structure + size_type old_max_size = mMaxSize; + KeyValue *old_data = mData; + const uint8 *old_control = mControl; + mData = nullptr; + mControl = nullptr; + mSize = 0; + mMaxSize = 0; + mLoadLeft = 0; + + // Allocate new table + AllocateTable(inNewMaxSize); + + // Reset all control bytes + memset(mControl, cBucketEmpty, mMaxSize + 15); + + if (old_data != nullptr) + { + // Copy all elements from the old table + for (size_type i = 0; i < old_max_size; ++i) + if (old_control[i] & cBucketUsed) + { + size_type index; + KeyValue *element = old_data + i; + JPH_IF_ENABLE_ASSERTS(bool inserted =) InsertKey(HashTableDetail::sGetKey(*element), index); + JPH_ASSERT(inserted); + new (mData + index) KeyValue(std::move(*element)); + element->~KeyValue(); + } + + // Free memory + if constexpr (cNeedsAlignedAllocate) + AlignedFree(old_data); + else + Free(old_data); + } + } + +protected: + /// Get an element by index + KeyValue & GetElement(size_type inIndex) const + { + return mData[inIndex]; + } + + /// Insert a key into the map, returns true if the element was inserted, false if it already existed. + /// outIndex is the index at which the element should be constructed / where it is located. + template + bool InsertKey(const Key &inKey, size_type &outIndex) + { + // Ensure we have enough space + if (mLoadLeft == 0) + { + // Should not be growing if we're already growing! + if constexpr (InsertAfterGrow) + JPH_ASSERT(false); + + // Decide if we need to clean up all tombstones or if we need to grow the map + size_type num_deleted = sGetMaxLoad(mMaxSize) - mSize; + if (num_deleted * cMaxDeletedElementsDenominator > mMaxSize * cMaxDeletedElementsNumerator) + rehash(0); + else + { + // Grow by a power of 2 + size_type new_max_size = max(mMaxSize << 1, 16); + if (new_max_size < mMaxSize) + { + JPH_ASSERT(false, "Overflow in hash table size, can't grow!"); + return false; + } + GrowTable(new_max_size); + } + } + + // Split hash into index and control value + size_type index; + uint8 control; + GetIndexAndControlValue(inKey, index, control); + + // Keeps track of the index of the first deleted bucket we found + constexpr size_type cNoDeleted = ~size_type(0); + size_type first_deleted_index = cNoDeleted; + + // Linear probing + KeyEqual equal; + size_type bucket_mask = mMaxSize - 1; + BVec16 control16 = BVec16::sReplicate(control); + BVec16 bucket_empty = BVec16::sZero(); + BVec16 bucket_deleted = BVec16::sReplicate(cBucketDeleted); + for (;;) + { + // Read 16 control values (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes) + BVec16 control_bytes = BVec16::sLoadByte16(mControl + index); + + // Check if we must find the element before we can insert + if constexpr (!InsertAfterGrow) + { + // Check for the control value we're looking for + // Note that when deleting we can create empty buckets instead of deleted buckets. + // This means we must unconditionally check all buckets in this batch for equality + // (also beyond the first empty bucket). + uint32 control_equal = uint32(BVec16::sEquals(control_bytes, control16).GetTrues()); + + // Index within the 16 buckets + size_type local_index = index; + + // Loop while there's still buckets to process + while (control_equal != 0) + { + // Get the first equal bucket + uint first_equal = CountTrailingZeros(control_equal); + + // Skip to the bucket + local_index += first_equal; + + // Make sure that our index is not beyond the end of the table + local_index &= bucket_mask; + + // We found a bucket with same control value + if (equal(HashTableDetail::sGetKey(mData[local_index]), inKey)) + { + // Element already exists + outIndex = local_index; + return false; + } + + // Skip past this bucket + control_equal >>= first_equal + 1; + local_index++; + } + + // Check if we're still scanning for deleted buckets + if (first_deleted_index == cNoDeleted) + { + // Check if any buckets have been deleted, if so store the first one + uint32 control_deleted = uint32(BVec16::sEquals(control_bytes, bucket_deleted).GetTrues()); + if (control_deleted != 0) + first_deleted_index = index + CountTrailingZeros(control_deleted); + } + } + + // Check for empty buckets + uint32 control_empty = uint32(BVec16::sEquals(control_bytes, bucket_empty).GetTrues()); + if (control_empty != 0) + { + // If we found a deleted bucket, use it. + // It doesn't matter if it is before or after the first empty bucket we found + // since we will always be scanning in batches of 16 buckets. + if (first_deleted_index == cNoDeleted || InsertAfterGrow) + { + index += CountTrailingZeros(control_empty); + --mLoadLeft; // Using an empty bucket decreases the load left + } + else + { + index = first_deleted_index; + } + + // Make sure that our index is not beyond the end of the table + index &= bucket_mask; + + // Update control byte + SetControlValue(index, control); + ++mSize; + + // Return index to newly allocated bucket + outIndex = index; + return true; + } + + // Move to next batch of 16 buckets + index = (index + 16) & bucket_mask; + } + } + +public: + /// Non-const iterator + class iterator : public IteratorBase + { + using Base = IteratorBase; + + public: + /// Properties + using reference = typename Base::value_type &; + using pointer = typename Base::value_type *; + + /// Constructors + explicit iterator(HashTable *inTable) : Base(inTable) { } + iterator(HashTable *inTable, size_type inIndex) : Base(inTable, inIndex) { } + iterator(const iterator &inIterator) : Base(inIterator) { } + + /// Assignment + iterator & operator = (const iterator &inRHS) { Base::operator = (inRHS); return *this; } + + using Base::operator *; + + /// Non-const access to key value pair + KeyValue & operator * () + { + JPH_ASSERT(this->IsValid()); + return this->mTable->mData[this->mIndex]; + } + + using Base::operator ->; + + /// Non-const access to key value pair + KeyValue * operator -> () + { + JPH_ASSERT(this->IsValid()); + return this->mTable->mData + this->mIndex; + } + }; + + /// Const iterator + class const_iterator : public IteratorBase + { + using Base = IteratorBase; + + public: + /// Properties + using reference = const typename Base::value_type &; + using pointer = const typename Base::value_type *; + + /// Constructors + explicit const_iterator(const HashTable *inTable) : Base(inTable) { } + const_iterator(const HashTable *inTable, size_type inIndex) : Base(inTable, inIndex) { } + const_iterator(const const_iterator &inRHS) : Base(inRHS) { } + const_iterator(const iterator &inIterator) : Base(inIterator.mTable, inIterator.mIndex) { } + + /// Assignment + const_iterator & operator = (const iterator &inRHS) { this->mTable = inRHS.mTable; this->mIndex = inRHS.mIndex; return *this; } + const_iterator & operator = (const const_iterator &inRHS) { Base::operator = (inRHS); return *this; } + }; + + /// Default constructor + HashTable() = default; + + /// Copy constructor + HashTable(const HashTable &inRHS) + { + CopyTable(inRHS); + } + + /// Move constructor + HashTable(HashTable &&ioRHS) noexcept : + mData(ioRHS.mData), + mControl(ioRHS.mControl), + mSize(ioRHS.mSize), + mMaxSize(ioRHS.mMaxSize), + mLoadLeft(ioRHS.mLoadLeft) + { + ioRHS.mData = nullptr; + ioRHS.mControl = nullptr; + ioRHS.mSize = 0; + ioRHS.mMaxSize = 0; + ioRHS.mLoadLeft = 0; + } + + /// Assignment operator + HashTable & operator = (const HashTable &inRHS) + { + if (this != &inRHS) + { + clear(); + + CopyTable(inRHS); + } + + return *this; + } + + /// Move assignment operator + HashTable & operator = (HashTable &&ioRHS) noexcept + { + if (this != &ioRHS) + { + clear(); + + mData = ioRHS.mData; + mControl = ioRHS.mControl; + mSize = ioRHS.mSize; + mMaxSize = ioRHS.mMaxSize; + mLoadLeft = ioRHS.mLoadLeft; + + ioRHS.mData = nullptr; + ioRHS.mControl = nullptr; + ioRHS.mSize = 0; + ioRHS.mMaxSize = 0; + ioRHS.mLoadLeft = 0; + } + + return *this; + } + + /// Destructor + ~HashTable() + { + clear(); + } + + /// Reserve memory for a certain number of elements + void reserve(size_type inMaxSize) + { + // Calculate max size based on load factor + size_type max_size = GetNextPowerOf2(max((cMaxLoadFactorDenominator * inMaxSize) / cMaxLoadFactorNumerator, 16)); + if (max_size <= mMaxSize) + return; + + GrowTable(max_size); + } + + /// Destroy the entire hash table + void clear() + { + // Delete all elements + if constexpr (!std::is_trivially_destructible()) + if (!empty()) + for (size_type i = 0; i < mMaxSize; ++i) + if (mControl[i] & cBucketUsed) + mData[i].~KeyValue(); + + if (mData != nullptr) + { + // Free memory + if constexpr (cNeedsAlignedAllocate) + AlignedFree(mData); + else + Free(mData); + + // Reset members + mData = nullptr; + mControl = nullptr; + mSize = 0; + mMaxSize = 0; + mLoadLeft = 0; + } + } + + /// Destroy the entire hash table but keeps the memory allocated + void ClearAndKeepMemory() + { + // Destruct elements + if constexpr (!std::is_trivially_destructible()) + if (!empty()) + for (size_type i = 0; i < mMaxSize; ++i) + if (mControl[i] & cBucketUsed) + mData[i].~KeyValue(); + mSize = 0; + + // If there are elements that are not marked cBucketEmpty, we reset them + size_type max_load = sGetMaxLoad(mMaxSize); + if (mLoadLeft != max_load) + { + // Reset all control bytes + memset(mControl, cBucketEmpty, mMaxSize + 15); + mLoadLeft = max_load; + } + } + + /// Iterator to first element + iterator begin() + { + return iterator(this); + } + + /// Iterator to one beyond last element + iterator end() + { + return iterator(this, mMaxSize); + } + + /// Iterator to first element + const_iterator begin() const + { + return const_iterator(this); + } + + /// Iterator to one beyond last element + const_iterator end() const + { + return const_iterator(this, mMaxSize); + } + + /// Iterator to first element + const_iterator cbegin() const + { + return const_iterator(this); + } + + /// Iterator to one beyond last element + const_iterator cend() const + { + return const_iterator(this, mMaxSize); + } + + /// Number of buckets in the table + size_type bucket_count() const + { + return mMaxSize; + } + + /// Max number of buckets that the table can have + constexpr size_type max_bucket_count() const + { + return size_type(1) << (sizeof(size_type) * 8 - 1); + } + + /// Check if there are no elements in the table + bool empty() const + { + return mSize == 0; + } + + /// Number of elements in the table + size_type size() const + { + return mSize; + } + + /// Max number of elements that the table can hold + constexpr size_type max_size() const + { + return size_type((uint64(max_bucket_count()) * cMaxLoadFactorNumerator) / cMaxLoadFactorDenominator); + } + + /// Get the max load factor for this table (max number of elements / number of buckets) + constexpr float max_load_factor() const + { + return float(cMaxLoadFactorNumerator) / float(cMaxLoadFactorDenominator); + } + + /// Insert a new element, returns iterator and if the element was inserted + std::pair insert(const value_type &inValue) + { + size_type index; + bool inserted = InsertKey(HashTableDetail::sGetKey(inValue), index); + if (inserted) + new (mData + index) KeyValue(inValue); + return std::make_pair(iterator(this, index), inserted); + } + + /// Find an element, returns iterator to element or end() if not found + const_iterator find(const Key &inKey) const + { + // Check if we have any data + if (empty()) + return cend(); + + // Split hash into index and control value + size_type index; + uint8 control; + GetIndexAndControlValue(inKey, index, control); + + // Linear probing + KeyEqual equal; + size_type bucket_mask = mMaxSize - 1; + BVec16 control16 = BVec16::sReplicate(control); + BVec16 bucket_empty = BVec16::sZero(); + for (;;) + { + // Read 16 control values + // (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes) + BVec16 control_bytes = BVec16::sLoadByte16(mControl + index); + + // Check for the control value we're looking for + // Note that when deleting we can create empty buckets instead of deleted buckets. + // This means we must unconditionally check all buckets in this batch for equality + // (also beyond the first empty bucket). + uint32 control_equal = uint32(BVec16::sEquals(control_bytes, control16).GetTrues()); + + // Index within the 16 buckets + size_type local_index = index; + + // Loop while there's still buckets to process + while (control_equal != 0) + { + // Get the first equal bucket + uint first_equal = CountTrailingZeros(control_equal); + + // Skip to the bucket + local_index += first_equal; + + // Make sure that our index is not beyond the end of the table + local_index &= bucket_mask; + + // We found a bucket with same control value + if (equal(HashTableDetail::sGetKey(mData[local_index]), inKey)) + { + // Element found + return const_iterator(this, local_index); + } + + // Skip past this bucket + control_equal >>= first_equal + 1; + local_index++; + } + + // Check for empty buckets + uint32 control_empty = uint32(BVec16::sEquals(control_bytes, bucket_empty).GetTrues()); + if (control_empty != 0) + { + // An empty bucket was found, we didn't find the element + return cend(); + } + + // Move to next batch of 16 buckets + index = (index + 16) & bucket_mask; + } + } + + /// @brief Erase an element by iterator + void erase(const const_iterator &inIterator) + { + JPH_ASSERT(inIterator.IsValid()); + + // Read 16 control values before and after the current index + // (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes) + BVec16 control_bytes_before = BVec16::sLoadByte16(mControl + ((inIterator.mIndex - 16) & (mMaxSize - 1))); + BVec16 control_bytes_after = BVec16::sLoadByte16(mControl + inIterator.mIndex); + BVec16 bucket_empty = BVec16::sZero(); + uint32 control_empty_before = uint32(BVec16::sEquals(control_bytes_before, bucket_empty).GetTrues()); + uint32 control_empty_after = uint32(BVec16::sEquals(control_bytes_after, bucket_empty).GetTrues()); + + // If (this index including) there exist 16 consecutive non-empty slots (represented by a bit being 0) then + // a probe looking for some element needs to continue probing so we cannot mark the bucket as empty + // but must mark it as deleted instead. + // Note that we use: CountLeadingZeros(uint16) = CountLeadingZeros(uint32) - 16. + uint8 control_value = CountLeadingZeros(control_empty_before) - 16 + CountTrailingZeros(control_empty_after) < 16? cBucketEmpty : cBucketDeleted; + + // Mark the bucket as empty/deleted + SetControlValue(inIterator.mIndex, control_value); + + // Destruct the element + mData[inIterator.mIndex].~KeyValue(); + + // If we marked the bucket as empty we can increase the load left + if (control_value == cBucketEmpty) + ++mLoadLeft; + + // Decrease size + --mSize; + } + + /// @brief Erase an element by key + size_type erase(const Key &inKey) + { + const_iterator it = find(inKey); + if (it == cend()) + return 0; + + erase(it); + return 1; + } + + /// Swap the contents of two hash tables + void swap(HashTable &ioRHS) noexcept + { + std::swap(mData, ioRHS.mData); + std::swap(mControl, ioRHS.mControl); + std::swap(mSize, ioRHS.mSize); + std::swap(mMaxSize, ioRHS.mMaxSize); + std::swap(mLoadLeft, ioRHS.mLoadLeft); + } + + /// In place re-hashing of all elements in the table. Removes all cBucketDeleted elements + /// The std version takes a bucket count, but we just re-hash to the same size. + void rehash(size_type) + { + // Update the control value for all buckets + for (size_type i = 0; i < mMaxSize; ++i) + { + uint8 &control = mControl[i]; + switch (control) + { + case cBucketDeleted: + // Deleted buckets become empty + control = cBucketEmpty; + break; + case cBucketEmpty: + // Remains empty + break; + default: + // Mark all occupied as deleted, to indicate it needs to move to the correct place + control = cBucketDeleted; + break; + } + } + + // Replicate control values to the last 15 entries + for (size_type i = 0; i < 15; ++i) + mControl[mMaxSize + i] = mControl[i]; + + // Loop over all elements that have been 'deleted' and move them to their new spot + BVec16 bucket_used = BVec16::sReplicate(cBucketUsed); + size_type bucket_mask = mMaxSize - 1; + uint32 probe_mask = bucket_mask & ~uint32(0b1111); // Mask out lower 4 bits because we test 16 buckets at a time + for (size_type src = 0; src < mMaxSize; ++src) + if (mControl[src] == cBucketDeleted) + for (;;) + { + // Split hash into index and control value + size_type src_index; + uint8 src_control; + GetIndexAndControlValue(HashTableDetail::sGetKey(mData[src]), src_index, src_control); + + // Linear probing + size_type dst = src_index; + for (;;) + { + // Check if any buckets are free + BVec16 control_bytes = BVec16::sLoadByte16(mControl + dst); + uint32 control_free = uint32(BVec16::sAnd(control_bytes, bucket_used).GetTrues()) ^ 0xffff; + if (control_free != 0) + { + // Select this bucket as destination + dst += CountTrailingZeros(control_free); + dst &= bucket_mask; + break; + } + + // Move to next batch of 16 buckets + dst = (dst + 16) & bucket_mask; + } + + // Check if we stay in the same probe group + if (((dst - src_index) & probe_mask) == ((src - src_index) & probe_mask)) + { + // We stay in the same group, we can stay where we are + SetControlValue(src, src_control); + break; + } + else if (mControl[dst] == cBucketEmpty) + { + // There's an empty bucket, move us there + SetControlValue(dst, src_control); + SetControlValue(src, cBucketEmpty); + new (mData + dst) KeyValue(std::move(mData[src])); + mData[src].~KeyValue(); + break; + } + else + { + // There's an element in the bucket we want to move to, swap them + JPH_ASSERT(mControl[dst] == cBucketDeleted); + SetControlValue(dst, src_control); + std::swap(mData[src], mData[dst]); + // Iterate again with the same source bucket + } + } + + // Reinitialize load left + mLoadLeft = sGetMaxLoad(mMaxSize) - mSize; + } + +private: + /// If this allocator needs to fall back to aligned allocations because the type requires it + static constexpr bool cNeedsAlignedAllocate = alignof(KeyValue) > (JPH_CPU_ADDRESS_BITS == 32? 8 : 16); + + /// Max load factor is cMaxLoadFactorNumerator / cMaxLoadFactorDenominator + static constexpr uint64 cMaxLoadFactorNumerator = 7; + static constexpr uint64 cMaxLoadFactorDenominator = 8; + + /// If we can recover this fraction of deleted elements, we'll reshuffle the buckets in place rather than growing the table + static constexpr uint64 cMaxDeletedElementsNumerator = 1; + static constexpr uint64 cMaxDeletedElementsDenominator = 8; + + /// Values that the control bytes can have + static constexpr uint8 cBucketEmpty = 0; + static constexpr uint8 cBucketDeleted = 0x7f; + static constexpr uint8 cBucketUsed = 0x80; // Lowest 7 bits are lowest 7 bits of the hash value + + /// The buckets, an array of size mMaxSize + KeyValue * mData = nullptr; + + /// Control bytes, an array of size mMaxSize + 15 + uint8 * mControl = nullptr; + + /// Number of elements in the table + size_type mSize = 0; + + /// Max number of elements that can be stored in the table + size_type mMaxSize = 0; + + /// Number of elements we can add to the table before we need to grow + size_type mLoadLeft = 0; +}; + +JPH_NAMESPACE_END diff --git a/libs/Jolt/Core/IssueReporting.cpp b/libs/Jolt/Core/IssueReporting.cpp index e4efe12..ff32448 100644 --- a/libs/Jolt/Core/IssueReporting.cpp +++ b/libs/Jolt/Core/IssueReporting.cpp @@ -4,10 +4,6 @@ #include -JPH_SUPPRESS_WARNINGS_STD_BEGIN -#include -JPH_SUPPRESS_WARNINGS_STD_END - JPH_NAMESPACE_BEGIN static void DummyTrace([[maybe_unused]] const char *inFMT, ...) diff --git a/libs/Jolt/Core/JobSystem.h b/libs/Jolt/Core/JobSystem.h index 301e4ce..1bd621a 100644 --- a/libs/Jolt/Core/JobSystem.h +++ b/libs/Jolt/Core/JobSystem.h @@ -195,6 +195,7 @@ class JPH_EXPORT JobSystem : public NonCopyable } inline void Release() { + #ifndef JPH_TSAN_ENABLED // Releasing a reference must use release semantics... if (mReferenceCount.fetch_sub(1, memory_order_release) == 1) { @@ -202,6 +203,11 @@ class JPH_EXPORT JobSystem : public NonCopyable atomic_thread_fence(memory_order_acquire); mJobSystem->FreeJob(this); } + #else + // But under TSAN, we cannot use atomic_thread_fence, so we use an acq_rel operation unconditionally instead + if (mReferenceCount.fetch_sub(1, memory_order_acq_rel) == 1) + mJobSystem->FreeJob(this); + #endif } /// Add to the dependency counter. diff --git a/libs/Jolt/Core/JobSystemThreadPool.cpp b/libs/Jolt/Core/JobSystemThreadPool.cpp index 04da37a..6d4dc10 100644 --- a/libs/Jolt/Core/JobSystemThreadPool.cpp +++ b/libs/Jolt/Core/JobSystemThreadPool.cpp @@ -11,7 +11,9 @@ #ifdef JPH_PLATFORM_WINDOWS JPH_SUPPRESS_WARNING_PUSH JPH_MSVC_SUPPRESS_WARNING(5039) // winbase.h(13179): warning C5039: 'TpSetCallbackCleanupGroup': pointer or reference to potentially throwing function passed to 'extern "C"' function under -EHc. Undefined behavior may occur if this function throws an exception. - #define WIN32_LEAN_AND_MEAN + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #endif #ifndef JPH_COMPILER_MINGW #include #else diff --git a/libs/Jolt/Core/JobSystemWithBarrier.cpp b/libs/Jolt/Core/JobSystemWithBarrier.cpp index eaaabdb..3d4c726 100644 --- a/libs/Jolt/Core/JobSystemWithBarrier.cpp +++ b/libs/Jolt/Core/JobSystemWithBarrier.cpp @@ -146,8 +146,11 @@ void JobSystemWithBarrier::BarrierImpl::Wait() } while (has_executed); } - // Wait for another thread to wake us when either there is more work to do or when all jobs have completed - int num_to_acquire = max(1, mSemaphore.GetValue()); // When there have been multiple releases, we acquire them all at the same time to avoid needlessly spinning on executing jobs + // Wait for another thread to wake us when either there is more work to do or when all jobs have completed. + // When there have been multiple releases, we acquire them all at the same time to avoid needlessly spinning on executing jobs. + // Note that using GetValue is inherently unsafe since we can read a stale value, but this is not an issue here as this is the only + // place where we acquire the semaphore. Other threads only release it, so we can only read a value that is lower or equal to the actual value. + int num_to_acquire = max(1, mSemaphore.GetValue()); mSemaphore.Acquire(num_to_acquire); mNumToAcquire -= num_to_acquire; } diff --git a/libs/Jolt/Core/LinearCurve.h b/libs/Jolt/Core/LinearCurve.h index ed77341..8700144 100644 --- a/libs/Jolt/Core/LinearCurve.h +++ b/libs/Jolt/Core/LinearCurve.h @@ -15,15 +15,15 @@ class StreamIn; // A set of points (x, y) that form a linear curve class JPH_EXPORT LinearCurve { -public: JPH_DECLARE_SERIALIZABLE_NON_VIRTUAL(JPH_EXPORT, LinearCurve) +public: /// A point on the curve class Point { - public: JPH_DECLARE_SERIALIZABLE_NON_VIRTUAL(JPH_EXPORT, Point) + public: float mX = 0.0f; float mY = 0.0f; }; diff --git a/libs/Jolt/Core/Memory.h b/libs/Jolt/Core/Memory.h index d5b7c6a..b5f318d 100644 --- a/libs/Jolt/Core/Memory.h +++ b/libs/Jolt/Core/Memory.h @@ -36,7 +36,11 @@ JPH_EXPORT void RegisterDefaultAllocator(); JPH_INLINE void *operator new (size_t inCount, std::align_val_t inAlignment) { return JPH::AlignedAllocate(inCount, static_cast(inAlignment)); } \ JPH_INLINE void operator delete (void *inPointer, [[maybe_unused]] std::align_val_t inAlignment) noexcept { JPH::AlignedFree(inPointer); } \ JPH_INLINE void *operator new[] (size_t inCount, std::align_val_t inAlignment) { return JPH::AlignedAllocate(inCount, static_cast(inAlignment)); } \ - JPH_INLINE void operator delete[] (void *inPointer, [[maybe_unused]] std::align_val_t inAlignment) noexcept { JPH::AlignedFree(inPointer); } + JPH_INLINE void operator delete[] (void *inPointer, [[maybe_unused]] std::align_val_t inAlignment) noexcept { JPH::AlignedFree(inPointer); } \ + JPH_INLINE void *operator new ([[maybe_unused]] size_t inCount, void *inPointer) noexcept { return inPointer; } \ + JPH_INLINE void operator delete ([[maybe_unused]] void *inPointer, [[maybe_unused]] void *inPlace) noexcept { /* Do nothing */ } \ + JPH_INLINE void *operator new[] ([[maybe_unused]] size_t inCount, void *inPointer) noexcept { return inPointer; } \ + JPH_INLINE void operator delete[] ([[maybe_unused]] void *inPointer, [[maybe_unused]] void *inPlace) noexcept { /* Do nothing */ } #else diff --git a/libs/Jolt/Core/MutexArray.h b/libs/Jolt/Core/MutexArray.h index f8fdd82..3a0b558 100644 --- a/libs/Jolt/Core/MutexArray.h +++ b/libs/Jolt/Core/MutexArray.h @@ -45,7 +45,7 @@ class MutexArray : public NonCopyable /// Convert an object index to a mutex index inline uint32 GetMutexIndex(uint32 inObjectIndex) const { - std::hash hasher; + Hash hasher; return hasher(inObjectIndex) & (mNumMutexes - 1); } diff --git a/libs/Jolt/Core/Profiler.cpp b/libs/Jolt/Core/Profiler.cpp index 4b2b908..6536607 100644 --- a/libs/Jolt/Core/Profiler.cpp +++ b/libs/Jolt/Core/Profiler.cpp @@ -60,6 +60,9 @@ uint64 Profiler::GetProcessorTicksPerSecond() const return (ticks - mReferenceTick) * 1000000000ULL / std::chrono::duration_cast(time - mReferenceTime).count(); } +// This function assumes that none of the threads are active while we're dumping the profile, +// otherwise there will be a race condition on mCurrentSample and the profile data. +JPH_TSAN_NO_SANITIZE void Profiler::NextFrame() { std::lock_guard lock(mLock); @@ -219,8 +222,331 @@ void Profiler::DumpChart(const char *inTag, const Threads &inThreads, const KeyT Profile Chart - - + +