Skip to content

Meshlet BVH Culling #19318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions crates/bevy_pbr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ pub mod graph {

use crate::{deferred::DeferredPbrLightingPlugin, graph::NodePbr};
use bevy_app::prelude::*;
use bevy_asset::{load_internal_asset, weak_handle, AssetApp, AssetPath, Assets, Handle};
use bevy_asset::{AssetApp, AssetPath, Assets, Handle};
use bevy_core_pipeline::core_3d::graph::{Core3d, Node3d};
use bevy_ecs::prelude::*;
use bevy_image::Image;
Expand All @@ -135,7 +135,7 @@ use bevy_render::{
extract_resource::ExtractResourcePlugin,
load_shader_library,
render_graph::RenderGraph,
render_resource::{Shader, ShaderRef},
render_resource::ShaderRef,
sync_component::SyncComponentPlugin,
view::VisibilitySystems,
ExtractSchedule, Render, RenderApp, RenderDebugFlags, RenderSystems,
Expand All @@ -149,9 +149,6 @@ fn shader_ref(path: PathBuf) -> ShaderRef {
ShaderRef::Path(AssetPath::from_path_buf(path).with_source("embedded"))
}

const MESHLET_VISIBILITY_BUFFER_RESOLVE_SHADER_HANDLE: Handle<Shader> =
weak_handle!("69187376-3dea-4d0f-b3f5-185bde63d6a2");

pub const TONEMAPPING_LUT_TEXTURE_BINDING_INDEX: u32 = 26;
pub const TONEMAPPING_LUT_SAMPLER_BINDING_INDEX: u32 = 27;

Expand Down Expand Up @@ -205,12 +202,7 @@ impl Plugin for PbrPlugin {
load_shader_library!(app, "render/view_transformations.wgsl");

// Setup dummy shaders for when MeshletPlugin is not used to prevent shader import errors.
load_internal_asset!(
app,
MESHLET_VISIBILITY_BUFFER_RESOLVE_SHADER_HANDLE,
"meshlet/dummy_visibility_buffer_resolve.wgsl",
Shader::from_wgsl
);
load_shader_library!(app, "meshlet/dummy_visibility_buffer_resolve.wgsl");

app.register_asset_reflect::<StandardMaterial>()
.register_type::<AmbientLight>()
Expand Down
90 changes: 63 additions & 27 deletions crates/bevy_pbr/src/meshlet/asset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use bevy_asset::{
};
use bevy_math::{Vec2, Vec3};
use bevy_reflect::TypePath;
use bevy_render::render_resource::ShaderType;
use bevy_tasks::block_on;
use bytemuck::{Pod, Zeroable};
use half::f16;
use lz4_flex::frame::{FrameDecoder, FrameEncoder};
use std::io::{Read, Write};
use thiserror::Error;
Expand All @@ -17,7 +17,7 @@ use thiserror::Error;
const MESHLET_MESH_ASSET_MAGIC: u64 = 1717551717668;

/// The current version of the [`MeshletMesh`] asset format.
pub const MESHLET_MESH_ASSET_VERSION: u64 = 1;
pub const MESHLET_MESH_ASSET_VERSION: u64 = 2;

/// A mesh that has been pre-processed into multiple small clusters of triangles called meshlets.
///
Expand Down Expand Up @@ -47,12 +47,32 @@ pub struct MeshletMesh {
pub(crate) vertex_uvs: Arc<[Vec2]>,
/// Triangle indices for meshlets.
pub(crate) indices: Arc<[u8]>,
/// The BVH8 used for culling and LOD selection of the meshlets. The root is at index 0.
pub(crate) bvh: Arc<[BvhNode]>,
/// The list of meshlets making up this mesh.
pub(crate) meshlets: Arc<[Meshlet]>,
/// Spherical bounding volumes.
pub(crate) meshlet_bounding_spheres: Arc<[MeshletBoundingSpheres]>,
/// Meshlet group and parent group simplification errors.
pub(crate) meshlet_simplification_errors: Arc<[MeshletSimplificationError]>,
pub(crate) meshlet_cull_data: Arc<[MeshletCullData]>,
/// The tight AABB of the meshlet mesh, used for frustum and occlusion culling at the instance
/// level.
pub(crate) aabb: MeshletAabb,
/// The depth of the culling BVH, used to determine the number of dispatches at runtime.
pub(crate) bvh_depth: u32,
}

/// A single BVH8 node in the BVH used for culling and LOD selection of a [`MeshletMesh`].
#[derive(Copy, Clone, Default, Pod, Zeroable)]
#[repr(C)]
pub struct BvhNode {
/// The tight AABBs of this node's children, used for frustum and occlusion during BVH
/// traversal.
pub aabbs: [MeshletAabbErrorOffset; 8],
/// The LOD bounding spheres of this node's children, used for LOD selection during BVH
/// traversal.
pub lod_bounds: [MeshletBoundingSphere; 8],
/// If `u8::MAX`, it indicates that the child of each children is a BVH node, otherwise it is the number of meshlets in the group.
pub child_counts: [u8; 8],
pub _padding: [u32; 2],
}

/// A single meshlet within a [`MeshletMesh`].
Expand Down Expand Up @@ -91,31 +111,37 @@ pub struct Meshlet {
/// Bounding spheres used for culling and choosing level of detail for a [`Meshlet`].
#[derive(Copy, Clone, Pod, Zeroable)]
#[repr(C)]
pub struct MeshletBoundingSpheres {
/// Bounding sphere used for frustum and occlusion culling for this meshlet.
pub culling_sphere: MeshletBoundingSphere,
pub struct MeshletCullData {
/// Tight bounding box, used for frustum and occlusion culling for this meshlet.
pub aabb: MeshletAabbErrorOffset,
/// Bounding sphere used for determining if this meshlet's group is at the correct level of detail for a given view.
pub lod_group_sphere: MeshletBoundingSphere,
/// Bounding sphere used for determining if this meshlet's parent group is at the correct level of detail for a given view.
pub lod_parent_group_sphere: MeshletBoundingSphere,
}

/// A spherical bounding volume used for a [`Meshlet`].
#[derive(Copy, Clone, Pod, Zeroable)]
/// An axis-aligned bounding box used for a [`Meshlet`].
#[derive(Copy, Clone, Default, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct MeshletBoundingSphere {
pub struct MeshletAabb {
pub center: Vec3,
pub radius: f32,
pub half_extent: Vec3,
}

/// Simplification error used for choosing level of detail for a [`Meshlet`].
#[derive(Copy, Clone, Pod, Zeroable)]
// An axis-aligned bounding box used for a [`Meshlet`].
#[derive(Copy, Clone, Default, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct MeshletSimplificationError {
/// Simplification error used for determining if this meshlet's group is at the correct level of detail for a given view.
pub group_error: f16,
/// Simplification error used for determining if this meshlet's parent group is at the correct level of detail for a given view.
pub parent_group_error: f16,
pub struct MeshletAabbErrorOffset {
pub center: Vec3,
pub error: f32,
pub half_extent: Vec3,
pub child_offset: u32,
}

/// A spherical bounding volume used for a [`Meshlet`].
#[derive(Copy, Clone, Default, Pod, Zeroable)]
#[repr(C)]
pub struct MeshletBoundingSphere {
pub center: Vec3,
pub radius: f32,
}

/// An [`AssetSaver`] for `.meshlet_mesh` [`MeshletMesh`] assets.
Expand Down Expand Up @@ -149,9 +175,11 @@ impl AssetSaver for MeshletMeshSaver {
write_slice(&asset.vertex_normals, &mut writer)?;
write_slice(&asset.vertex_uvs, &mut writer)?;
write_slice(&asset.indices, &mut writer)?;
write_slice(&asset.bvh, &mut writer)?;
write_slice(&asset.meshlets, &mut writer)?;
write_slice(&asset.meshlet_bounding_spheres, &mut writer)?;
write_slice(&asset.meshlet_simplification_errors, &mut writer)?;
write_slice(&asset.meshlet_cull_data, &mut writer)?;
writer.write_all(bytemuck::bytes_of(&asset.aabb))?;
writer.write_all(bytemuck::bytes_of(&asset.bvh_depth))?;
writer.finish()?;

Ok(())
Expand Down Expand Up @@ -190,18 +218,26 @@ impl AssetLoader for MeshletMeshLoader {
let vertex_normals = read_slice(reader)?;
let vertex_uvs = read_slice(reader)?;
let indices = read_slice(reader)?;
let bvh = read_slice(reader)?;
let meshlets = read_slice(reader)?;
let meshlet_bounding_spheres = read_slice(reader)?;
let meshlet_simplification_errors = read_slice(reader)?;
let meshlet_cull_data = read_slice(reader)?;
let mut bytes = [0u8; 24];
reader.read_exact(&mut bytes)?;
let aabb = bytemuck::cast(bytes);
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes)?;
let bvh_depth = u32::from_le_bytes(bytes);

Ok(MeshletMesh {
vertex_positions,
vertex_normals,
vertex_uvs,
indices,
bvh,
meshlets,
meshlet_bounding_spheres,
meshlet_simplification_errors,
meshlet_cull_data,
aabb,
bvh_depth,
})
}

Expand Down
110 changes: 110 additions & 0 deletions crates/bevy_pbr/src/meshlet/cull_bvh.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#import bevy_pbr::meshlet_bindings::{
InstancedOffset,
get_aabb,
get_aabb_error,
get_aabb_child_offset,
constants,
meshlet_bvh_nodes,
meshlet_bvh_cull_count_read,
meshlet_bvh_cull_count_write,
meshlet_bvh_cull_dispatch,
meshlet_bvh_cull_queue,
meshlet_meshlet_cull_count_early,
meshlet_meshlet_cull_count_late,
meshlet_meshlet_cull_dispatch_early,
meshlet_meshlet_cull_dispatch_late,
meshlet_meshlet_cull_queue,
meshlet_second_pass_bvh_count,
meshlet_second_pass_bvh_dispatch,
meshlet_second_pass_bvh_queue,
}
#import bevy_pbr::meshlet_cull_shared::{
lod_error_is_imperceptible,
aabb_in_frustum,
should_occlusion_cull_aabb,
}

@compute
@workgroup_size(128, 1, 1) // 8 threads per node
fn cull_bvh(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
// Calculate the queue ID for this thread
let dispatch_id = global_invocation_id.x;
var node = dispatch_id >> 3u;
let subnode = dispatch_id & 7u;
if node >= meshlet_bvh_cull_count_read { return; }

node = select(node, constants.rightmost_slot - node, constants.read_from_front == 0u);
let instanced_offset = meshlet_bvh_cull_queue[node];
let instance_id = instanced_offset.instance_id;
let bvh_node = &meshlet_bvh_nodes[instanced_offset.offset];

var aabb_error_offset = (*bvh_node).aabbs[subnode];
let aabb = get_aabb(&aabb_error_offset);
let parent_error = get_aabb_error(&aabb_error_offset);
let lod_sphere = (*bvh_node).lod_bounds[subnode];

let parent_is_imperceptible = lod_error_is_imperceptible(lod_sphere, parent_error, instance_id);
// Error and frustum cull, in both passes
if parent_is_imperceptible || !aabb_in_frustum(aabb, instance_id) { return; }

let child_offset = get_aabb_child_offset(&aabb_error_offset);
let index = subnode >> 2u;
let bit_offset = subnode & 3u;
let packed_child_count = (*bvh_node).child_counts[index];
let child_count = extractBits(packed_child_count, bit_offset * 8u, 8u);
var value = InstancedOffset(instance_id, child_offset);

// If we pass, try occlusion culling
// If this node was occluded, push it's children to the second pass to check against this frame's HZB
if should_occlusion_cull_aabb(aabb, instance_id) {
#ifdef MESHLET_FIRST_CULLING_PASS
if child_count == 255u {
let id = atomicAdd(&meshlet_second_pass_bvh_count, 1u);
meshlet_second_pass_bvh_queue[id] = value;
if ((id & 15u) == 0u) {
atomicAdd(&meshlet_second_pass_bvh_dispatch.x, 1u);
}
} else {
let base = atomicAdd(&meshlet_meshlet_cull_count_late, child_count);
let start = constants.rightmost_slot - base;
for (var i = start; i < start - child_count; i--) {
meshlet_meshlet_cull_queue[i] = value;
value.offset += 1u;
}
let req = (base + child_count + 127u) >> 7u;
atomicMax(&meshlet_meshlet_cull_dispatch_late.x, req);
}
#endif
return;
}

// If we pass, push the children to the next BVH cull
if child_count == 255u {
let id = atomicAdd(&meshlet_bvh_cull_count_write, 1u);
let index = select(constants.rightmost_slot - id, id, constants.read_from_front == 0u);
meshlet_bvh_cull_queue[index] = value;
if ((id & 15u) == 0u) {
atomicAdd(&meshlet_bvh_cull_dispatch.x, 1u);
}
} else {
#ifdef MESHLET_FIRST_CULLING_PASS
let base = atomicAdd(&meshlet_meshlet_cull_count_early, child_count);
let end = base + child_count;
for (var i = base; i < end; i++) {
meshlet_meshlet_cull_queue[i] = value;
value.offset += 1u;
}
let req = (end + 127u) >> 7u;
atomicMax(&meshlet_meshlet_cull_dispatch_early.x, req);
#else
let base = atomicAdd(&meshlet_meshlet_cull_count_late, child_count);
let start = constants.rightmost_slot - base;
for (var i = start; i < start - child_count; i--) {
meshlet_meshlet_cull_queue[i] = value;
value.offset += 1u;
}
let req = (base + child_count + 127u) >> 7u;
atomicMax(&meshlet_meshlet_cull_dispatch_late.x, req);
#endif
}
}
Loading