From 5575da22d3f844ba5af0abe925e2890dfb9e60e3 Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 03:33:04 -0400 Subject: [PATCH 1/8] newview: cache GLTF material binds, dedup probe occlusion pushGLTFBatch rebound the full LLFetchedGLTFMaterial (up to 5 textures + ~7 uniforms) for every draw. Thread a (material, media-texture) cache through the GLTF push paths and skip the rebind when unchanged, and sort PBR faces by GLTF material in CompareBatchBreaker so same-material draws are adjacent and actually coalesce. The media-override texture stays in the cache key; renderAlphaObjects invalidates the cache where the simple pool rebinds texture units. Also drop the duplicate hero-probe occlusion pass in doOcclusion -- the first block already issues both reflection and hero queries. Co-Authored-By: Claude Opus 4.8 (1M context) --- indra/newview/lldrawpool.cpp | 31 +++++++++++++++------ indra/newview/lldrawpool.h | 7 +++-- indra/newview/llgltfmaterialpreviewmgr.cpp | 4 ++- indra/newview/llvovolume.cpp | 10 +++++++ indra/newview/pipeline.cpp | 32 +++++++--------------- 5 files changed, 50 insertions(+), 34 deletions(-) diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp index 3eca6059ed0..56a3baed344 100644 --- a/indra/newview/lldrawpool.cpp +++ b/indra/newview/lldrawpool.cpp @@ -786,6 +786,8 @@ void LLRenderPass::pushGLTFBatches(U32 type, bool textured) void LLRenderPass::pushGLTFBatches(U32 type) { LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL; + LLFetchedGLTFMaterial* lastMat = nullptr; + LLViewerTexture* lastTex = nullptr; auto* begin = gPipeline.beginRenderMap(type); auto* end = gPipeline.endRenderMap(type); for (LLCullResult::drawinfo_iterator i = begin; i != end; ) @@ -794,7 +796,7 @@ void LLRenderPass::pushGLTFBatches(U32 type) LLDrawInfo& params = **i; LLCullResult::increment_iterator(i, end); - pushGLTFBatch(params); + pushGLTFBatch(params, lastMat, lastTex); } } @@ -814,16 +816,25 @@ void LLRenderPass::pushUntexturedGLTFBatches(U32 type) } // static -void LLRenderPass::pushGLTFBatch(LLDrawInfo& params) +void LLRenderPass::pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex) { - auto& mat = params.mGLTFMaterial; + LLFetchedGLTFMaterial* mat = params.mGLTFMaterial.get(); - if (mat.notNull()) + if (mat) { - mat->bind(params.mTexture); + // params.mTexture is the media override (bind() applies it to base color + // and emissive), so it is part of the cache key -- otherwise media faces + // sharing a material would render with a stale base texture. + LLViewerTexture* tex = params.mTexture.get(); + if (mat != lastMat || tex != lastTex) + { + mat->bind(params.mTexture); + lastMat = mat; + lastTex = tex; + } } - LLGLDisable cull_face(mat.notNull() && mat->mDoubleSided ? GL_CULL_FACE : 0); + LLGLDisable cull_face(mat && mat->mDoubleSided ? GL_CULL_FACE : 0); setup_texture_matrix(params); @@ -866,6 +877,8 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type) const LLVOAvatar* lastAvatar = nullptr; U64 lastMeshId = 0; bool skipLastSkin = false; + LLFetchedGLTFMaterial* lastMat = nullptr; + LLViewerTexture* lastTex = nullptr; auto* begin = gPipeline.beginRenderMap(type); auto* end = gPipeline.endRenderMap(type); @@ -875,7 +888,7 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type) LLDrawInfo& params = **i; LLCullResult::increment_iterator(i, end); - pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin); + pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin, lastMat, lastTex); } } @@ -900,11 +913,11 @@ void LLRenderPass::pushUntexturedRiggedGLTFBatches(U32 type) // static -void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin) +void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex) { if (uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin)) { - pushGLTFBatch(params); + pushGLTFBatch(params, lastMat, lastTex); } } diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h index 46696fc4a4c..c645565f06c 100644 --- a/indra/newview/lldrawpool.h +++ b/indra/newview/lldrawpool.h @@ -40,6 +40,7 @@ class LLDrawInfo; class LLVOAvatar; class LLGLSLShader; class LLMeshSkinInfo; +class LLFetchedGLTFMaterial; class LLDrawPool { @@ -376,8 +377,10 @@ class LLRenderPass : public LLDrawPool void pushUntexturedRiggedGLTFBatches(U32 type); // push a single GLTF draw call - static void pushGLTFBatch(LLDrawInfo& params); - static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin); + // lastMat/lastTex track the most recently bound material+media texture so + // consecutive draws sharing a material skip the redundant LLFetchedGLTFMaterial::bind + static void pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex); + static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex); static void pushUntexturedGLTFBatch(LLDrawInfo& params); static void pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin); diff --git a/indra/newview/llgltfmaterialpreviewmgr.cpp b/indra/newview/llgltfmaterialpreviewmgr.cpp index c49b7519828..11499b15fb5 100644 --- a/indra/newview/llgltfmaterialpreviewmgr.cpp +++ b/indra/newview/llgltfmaterialpreviewmgr.cpp @@ -510,9 +510,11 @@ bool LLGLTFPreviewTexture::render() gPipeline.bindDeferredShader(shader); fixup_shader_constants(shader); + LLFetchedGLTFMaterial* lastMat = nullptr; + LLViewerTexture* lastTex = nullptr; for (PreviewSpherePart& part : preview_sphere) { - LLRenderPass::pushGLTFBatch(*part->mDrawInfo); + LLRenderPass::pushGLTFBatch(*part->mDrawInfo, lastMat, lastTex); } gPipeline.unbindDeferredShader(shader); diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 8a386156144..a1580dc7112 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -6248,6 +6248,16 @@ struct CompareBatchBreaker const LLTextureEntry* lte = lhs->getTextureEntry(); const LLTextureEntry* rte = rhs->getTextureEntry(); + // Group faces sharing a GLTF material so the PBR push loop can skip + // redundant LLFetchedGLTFMaterial::bind calls (see LLRenderPass::pushGLTFBatch). + // Non-PBR faces have a null render material, so this is a no-op for them. + const LLGLTFMaterial* lgltf = lte->getGLTFRenderMaterial(); + const LLGLTFMaterial* rgltf = rte->getGLTFRenderMaterial(); + if (lgltf != rgltf) + { + return lgltf < rgltf; + } + if (lte->getBumpmap() != rte->getBumpmap()) { return lte->getBumpmap() < rte->getBumpmap(); diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 48af355dc10..7119ea43a01 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -2953,26 +2953,6 @@ void LLPipeline::doOcclusion(LLCamera& camera) gGL.setColorMask(true, true); } - if (sReflectionProbesEnabled && sUseOcclusion > 1 && !LLPipeline::sShadowRender && !gCubeSnapshot) - { - gGL.setColorMask(false, false); - LLGLDepthTest depth(GL_TRUE, GL_FALSE); - LLGLDisable cull(GL_CULL_FACE); - - gOcclusionCubeProgram.bind(); - - if (mCubeVB.isNull()) - { //cube VB will be used for issuing occlusion queries - mCubeVB = ll_create_cube_vb(LLVertexBuffer::MAP_VERTEX); - } - mCubeVB->setBuffer(); - - mHeroProbeManager.doOcclusion(); - gOcclusionCubeProgram.unbind(); - - gGL.setColorMask(true, true); - } - if (LLPipeline::sUseOcclusion > 1 && (sCull->hasOcclusionGroups() || LLVOCachePartition::sNeedsOcclusionCheck)) { @@ -7091,6 +7071,10 @@ void LLPipeline::renderAlphaObjects(bool rigged) const LLVOAvatar* lastAvatarGLTF = nullptr; U64 lastMeshIdGLTF = 0; bool skipLastSkinGLTF; + // GLTF material bind cache; invalidated in the non-GLTF branches below since + // mSimplePool->pushBatch rebinds texture units and would clobber the material + LLFetchedGLTFMaterial* lastMatGLTF = nullptr; + LLViewerTexture* lastTexGLTF = nullptr; auto* begin = gPipeline.beginRenderMap(type); auto* end = gPipeline.endRenderMap(type); @@ -7114,7 +7098,7 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); - LLRenderPass::pushRiggedGLTFBatch(*pparams, lastAvatarGLTF, lastMeshIdGLTF, skipLastSkinGLTF); + LLRenderPass::pushRiggedGLTFBatch(*pparams, lastAvatarGLTF, lastMeshIdGLTF, skipLastSkinGLTF, lastMatGLTF, lastTexGLTF); } else { @@ -7122,6 +7106,8 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); + lastMatGLTF = nullptr; // pushBatch clobbers texture units + lastTexGLTF = nullptr; if (mSimplePool->uploadMatrixPalette(pparams->mAvatar, pparams->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin)) { mSimplePool->pushBatch(*pparams, true, true); @@ -7136,7 +7122,7 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); - LLRenderPass::pushGLTFBatch(*pparams); + LLRenderPass::pushGLTFBatch(*pparams, lastMatGLTF, lastTexGLTF); } else { @@ -7144,6 +7130,8 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); + lastMatGLTF = nullptr; // pushBatch clobbers texture units + lastTexGLTF = nullptr; mSimplePool->pushBatch(*pparams, true, true); } } From fba6f909a965d30281b38fbb055ea4449e586eaa Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 03:33:20 -0400 Subject: [PATCH 2/8] newview: add RenderAvatarShadowDetail to skip costly avatar shadow passes The avatar alpha-blend shadow pass is the most expensive and least visually important avatar shadow pass, and it is rendered across every cascade. RenderAvatarShadowDetail (default 2 = unchanged) lets it (and optionally the alpha-mask pass) be skipped, speeding up crowd scenes. Co-Authored-By: Claude Opus 4.8 (1M context) --- indra/newview/app_settings/settings_alchemy.xml | 11 +++++++++++ indra/newview/lldrawpoolavatar.cpp | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/indra/newview/app_settings/settings_alchemy.xml b/indra/newview/app_settings/settings_alchemy.xml index b323c0f9012..138195c4218 100644 --- a/indra/newview/app_settings/settings_alchemy.xml +++ b/indra/newview/app_settings/settings_alchemy.xml @@ -1158,6 +1158,17 @@ Value 0 + RenderAvatarShadowDetail + + Comment + Which avatar passes cast shadows. 0 = opaque only, 1 = opaque + alpha mask, 2 = full (also alpha blend). Lower values speed up crowd scenes by skipping the expensive alpha-blend avatar shadow pass across all cascades. + Persist + 1 + Type + S32 + Value + 2 + RenderBloomHDR Comment diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index 89d85dfa2a4..1cca08c20bc 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -397,6 +397,18 @@ void LLDrawPoolAvatar::renderShadow(S32 pass) return; } + // Optionally skip the costlier avatar shadow passes (alpha blend is the most + // expensive and least visually important; alpha mask next). Default 2 = full. + static LLCachedControl avatar_shadow_detail(gSavedSettings, "RenderAvatarShadowDetail", 2); + if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND && avatar_shadow_detail() < 2) + { + return; + } + if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK && avatar_shadow_detail() < 1) + { + return; + } + LLDrawPoolAvatar::sShadowPass = pass; if (pass == SHADOW_PASS_AVATAR_OPAQUE) From 01ae61f6bbe7b5ede41f3912e439e65955284b2b Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 08:08:36 -0400 Subject: [PATCH 3/8] newview: add RenderShadowCullMode 1 to share the sun shadow cull across cascades Sun shadows cull and sort the scene once per cascade -- 4 octree walks + state sorts per frame. RenderShadowCullMode 1 does the octree cull once against a frustum spanning all cascades, then each cascade re-buckets the union's visible groups by its own frustum (bucketShadowCull, using the same AABBInFrustumObjectBounds test the per-cascade cull uses) and sorts only its own render map. Each cascade still renders only its slice, so it is GPU-neutral versus per-cascade culling while saving 3 of 4 octree walks -- a win on CPU-bound targets. Default 0 = unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../newview/app_settings/settings_alchemy.xml | 11 ++ indra/newview/pipeline.cpp | 158 +++++++++++++++++- indra/newview/pipeline.h | 2 +- 3 files changed, 167 insertions(+), 4 deletions(-) diff --git a/indra/newview/app_settings/settings_alchemy.xml b/indra/newview/app_settings/settings_alchemy.xml index 138195c4218..306457b7fbe 100644 --- a/indra/newview/app_settings/settings_alchemy.xml +++ b/indra/newview/app_settings/settings_alchemy.xml @@ -1169,6 +1169,17 @@ Value 2 + RenderShadowCullMode + + Comment + How sun shadow cascades are culled. 0 = cull and sort each cascade separately (default). 1 = cull and sort once against a frustum spanning all cascades, sharing the result (less CPU per frame, more GPU vertex work per cascade). Experimental. + Persist + 1 + Type + S32 + Value + 0 + RenderBloomHDR Comment diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 7119ea43a01..e16c2c405ec 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -10391,7 +10391,7 @@ static LLTrace::BlockTimerStatHandle FTM_SHADOW_ALPHA_TREE("Alpha Tree"); static LLTrace::BlockTimerStatHandle FTM_SHADOW_ALPHA_GRASS("Alpha Grass"); static LLTrace::BlockTimerStatHandle FTM_SHADOW_FULLBRIGHT_ALPHA_MASKED("Fullbright Alpha Masked"); -void LLPipeline::renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& shadow_cam, LLCullResult& result, bool depth_clamp) +void LLPipeline::renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& shadow_cam, LLCullResult& result, bool depth_clamp, bool do_cull) { LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE; //LL_RECORD_BLOCK_TIME(FTM_SHADOW_RENDER); LL_PROFILE_GPU_ZONE("renderShadow"); @@ -10427,7 +10427,13 @@ void LLPipeline::renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCa LLGLDepthTest depth_test(GL_TRUE, GL_TRUE, GL_LESS); - updateCull(shadow_cam, result); + // In RenderShadowCullMode 1, do_cull is false: generateSunShadow did the single union + // octree cull and pre-filtered `result` to this cascade's frustum (bucketShadowCull), + // so skip the per-cascade octree walk and only sort/build this cascade's render map. + if (do_cull) + { + updateCull(shadow_cam, result); + } stateSort(shadow_cam, result); @@ -10825,6 +10831,49 @@ class LLDisableOcclusionCulling } }; +// Re-bucket a shared sun-shadow cull (produced by a single union octree walk) down to one +// cascade: copy the union's visible/drawable groups whose object bounds intersect this +// cascade's frustum into `dst` (the same AABBInFrustumObjectBounds test the per-cascade +// cull uses, so the geometry matches mode 0 exactly), then pass the small individual- +// drawable and bridge lists through unfiltered. stateSort then builds the cascade's render +// map from `dst`. Lets RenderShadowCullMode 1 share one octree walk across all cascades. +static void bucketShadowCull(LLCullResult& src, LLCamera& cam, LLCullResult& dst) +{ + dst.clear(); + + for (LLCullResult::sg_iterator i = src.beginVisibleGroups(), end = src.endVisibleGroups(); i != end; ++i) + { + LLSpatialGroup* group = *i; + if (!group->isDead() && + cam.AABBInFrustum(group->getObjectBounds()[0], group->getObjectBounds()[1]) > 0) + { + dst.pushVisibleGroup(group); + } + } + + for (LLCullResult::sg_iterator i = src.beginDrawableGroups(), end = src.endDrawableGroups(); i != end; ++i) + { + LLSpatialGroup* group = *i; + if (!group->isDead() && + cam.AABBInFrustum(group->getObjectBounds()[0], group->getObjectBounds()[1]) > 0) + { + dst.pushDrawableGroup(group); + } + } + + // Individual drawables and spatial bridges (attachments/animesh) are few; pass them + // through unfiltered -- conservative (they render into every cascade) but correct. + for (LLCullResult::drawable_iterator i = src.beginVisibleList(), end = src.endVisibleList(); i != end; ++i) + { + dst.pushDrawable(*i); + } + + for (LLCullResult::bridge_iterator i = src.beginVisibleBridge(), end = src.endVisibleBridge(); i != end; ++i) + { + dst.pushBridge(*i); + } +} + void LLPipeline::generateSunShadow(LLCamera& camera) { if (!sRenderDeferred || RenderShadowDetail <= 0) @@ -11058,6 +11107,105 @@ void LLPipeline::generateSunShadow(LLCamera& camera) } else { + // RenderShadowCullMode 1: do the expensive octree cull ONCE against a frustum + // spanning every sun cascade, then have each cascade cheaply re-bucket the union's + // visible groups by its own frustum (bucketShadowCull) and build its own render + // map. Saves 3 of 4 octree walks per frame while each cascade still renders only + // its own slice -- GPU-neutral vs. per-cascade culling, so it helps CPU-bound + // targets without regressing GPU-bound ones. Disabled in cube snapshots. Default 0. + static LLCachedControl sShadowCullMode(gSavedSettings, "RenderShadowCullMode", 0); + bool have_union_cull = false; + static LLCullResult sUnionShadowResult; + if (sShadowCullMode() == 1 && !gCubeSnapshot) + { + // updateFrustumPlanes below seeds the frustum corners from the *current* GL + // matrices, and earlier setup in this function leaves them in a non-main-view + // state. Restore the saved (main-view) matrices first, as the cascade loop + // does each iteration, so the corner directions used below are correct. + set_current_modelview(saved_view); + set_current_projection(saved_proj); + + LLCamera ucam = camera; + ucam.setFar(16.f); + LLViewerCamera::updateFrustumPlanes(ucam, false, false, true); + + LLVector3 ueye = camera.getOrigin(); + LLVector3* ufrust = ucam.mAgentFrustum; + LLVector3 upn = ucam.getAtAxis(); + for (U32 i = 0; i < 4; i++) + { + LLVector3 delta = ufrust[i+4]-ueye; + delta += (ufrust[i+4]-ufrust[(i+2)%4+4])*0.05f; + delta.normVec(); + F32 dp = delta*upn; + ufrust[i] = ueye + (delta*dist[0]*0.75f)/dp; + ufrust[i+4] = ueye + (delta*dist[4]*1.25f)/dp; + } + + { + glm::mat4 uview = look(camera.getOrigin(), lightDir, -up); + + // AABB the 8 full-range frustum corners directly in light space. ufrust + // spans [dist[0], dist[4]] (built above), so this box is a guaranteed + // superset of every cascade. getVisiblePointCloud is NOT usable here: the + // far corners sit past the view far plane, so it clips the cloud down to + // the 4 near corners and the union collapses to a dot at the camera. + LLVector3 mn(mul_mat4_vec3(uview, glm::vec3(ufrust[0]))); + LLVector3 mx = mn; + for (U32 i = 1; i < 8; i++) + { + LLVector3 p(mul_mat4_vec3(uview, glm::vec3(ufrust[i]))); + update_min_max(mn, mx, p); + } + + LLVector3 ucenter = (mn+mx)*0.5f; + + // Conservative ortho light-space projection bounding the whole point + // cloud. updateFrustumPlanes derives the cull frustum from the *current* + // GL modelview/projection, so set them here. Ortho is looser than the + // per-cascade perspective fit, so the result is a superset of every + // cascade frustum -- no dropped casters. + // + // Pad the depth range: with the sun near-overhead the light-space + // footprint is nearly planar (znear ~= zfar), which makes glm::ortho + // singular and updateFrustumPlanes unproject to NaN frustum corners -- + // shadows then drop and flip with camera angle. The near plane is + // replaced by shadow_near_clip below and the far only needs to clear the + // receivers, so widening the depth range is always safe. + F32 zpad = llmax(mx.mV[0] - mn.mV[0], mx.mV[1] - mn.mV[1]) * 0.5f + 1.f; + glm::mat4 uproj = glm::ortho(mn.mV[0], mx.mV[0], mn.mV[1], mx.mV[1], -mx.mV[2] - zpad, -mn.mV[2] + zpad); + + ucam.setOriginAndLookAt(ueye, up, ucenter); + ucam.setOrigin(0, 0, 0); + + LLViewerCamera::sCurCameraID = LLViewerCamera::CAMERA_SUN_SHADOW0; + set_current_modelview(uview); + set_current_projection(uproj); + LLViewerCamera::updateFrustumPlanes(ucam, false, false, true); + ucam.getAgentPlane(LLCamera::AGENT_PLANE_NEAR).set(shadow_near_clip); + + bool saved_shadow_render = LLPipeline::sShadowRender; + U32 saved_occlusion = sUseOcclusion; + LLPipeline::sShadowRender = true; + // Disable occlusion culling for the shadow cull exactly as renderShadow + // does: occlusion queries are main-camera and previous-frame based, so + // leaving them on wrongly culls casters hidden from the main view (their + // shadows still show) and flickers as the queries resolve frame to frame. + sUseOcclusion = 0; + // One octree walk for the whole sun shadow. No stateSort here -- each + // cascade re-buckets these visible groups and sorts its own render map. + updateCull(ucam, sUnionShadowResult); + sUseOcclusion = saved_occlusion; + LLPipeline::sShadowRender = saved_shadow_render; + + // restore main matrices (the cascade loop sets its own each iteration) + set_current_modelview(saved_view); + set_current_projection(saved_proj); + + have_union_cull = true; + } + } + for (S32 j = 0; j < (gCubeSnapshot ? 2 : 4); j++) { if (!hasRenderDebugMask(RENDER_DEBUG_SHADOW_FRUSTA) && !gCubeSnapshot) @@ -11418,7 +11566,11 @@ void LLPipeline::generateSunShadow(LLCamera& camera) { static LLCullResult result[4]; - renderShadow(view[j], proj[j], shadow_cam, result[j], true); + if (have_union_cull) + { // re-bucket the shared union cull down to this cascade's frustum + bucketShadowCull(sUnionShadowResult, shadow_cam, result[j]); + } + renderShadow(view[j], proj[j], shadow_cam, result[j], true, !have_union_cull); } mRT->shadow[j].flush(); diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index 665b016ef03..f7cc8dd10c3 100644 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -353,7 +353,7 @@ class LLPipeline void renderHighlight(const LLViewerObject* obj, F32 fade); - void renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& camera, LLCullResult& result, bool depth_clamp); + void renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& camera, LLCullResult& result, bool depth_clamp, bool do_cull = true); void renderSelectedFaces(const LLColor4& color); void renderHighlights(); void renderDebug(); From 5a6e1378514367328dae5714bfc2070d32880423 Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 15:29:46 -0400 Subject: [PATCH 4/8] newview: skip deferred shader bind for empty material passes The deferred pass loop binds a shader per pass with no emptiness check, and the materials pool exposes 24 passes (12 legacy material types x 2 rigged). Modern PBR/simple scenes use none of them, wasting up to 24 bindDeferredShader calls per frame. Add LLDrawPoolMaterials::isPassEmpty (consulted by begin/render/endDeferredPass so the bind/unbind stays balanced) to skip passes whose render map is empty. Visually a no-op -- empty passes had nothing to draw -- so it is pure CPU/GL-call reduction that helps weak hardware on the common modern-content case. Co-Authored-By: Claude Opus 4.8 (1M context) --- indra/newview/lldrawpoolmaterials.cpp | 69 ++++++++++++++++++--------- indra/newview/lldrawpoolmaterials.h | 5 ++ 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/indra/newview/lldrawpoolmaterials.cpp b/indra/newview/lldrawpoolmaterials.cpp index e7ec2022d25..b3307858116 100644 --- a/indra/newview/lldrawpoolmaterials.cpp +++ b/indra/newview/lldrawpoolmaterials.cpp @@ -50,10 +50,46 @@ S32 LLDrawPoolMaterials::getNumDeferredPasses() return 12*2; } +// Render-map pass type for each non-rigged material pass; rigged passes use type + 1. +// Kept in sync with the shader index table in beginDeferredPass and the type list in +// renderDeferred (which now indexes this same array). +static const U32 sMaterialPassType[] = +{ + LLRenderPass::PASS_MATERIAL, + LLRenderPass::PASS_MATERIAL_ALPHA_MASK, + LLRenderPass::PASS_MATERIAL_ALPHA_EMISSIVE, + LLRenderPass::PASS_SPECMAP, + LLRenderPass::PASS_SPECMAP_MASK, + LLRenderPass::PASS_SPECMAP_EMISSIVE, + LLRenderPass::PASS_NORMMAP, + LLRenderPass::PASS_NORMMAP_MASK, + LLRenderPass::PASS_NORMMAP_EMISSIVE, + LLRenderPass::PASS_NORMSPEC, + LLRenderPass::PASS_NORMSPEC_MASK, + LLRenderPass::PASS_NORMSPEC_EMISSIVE, +}; + +bool LLDrawPoolMaterials::isPassEmpty(S32 pass) +{ + bool rigged = false; + if (pass >= 12) + { + rigged = true; + pass -= 12; + } + U32 type = sMaterialPassType[pass] + (rigged ? 1 : 0); + return gPipeline.beginRenderMap(type) == gPipeline.endRenderMap(type); +} + void LLDrawPoolMaterials::beginDeferredPass(S32 pass) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL; + if (isPassEmpty(pass)) + { // nothing to draw this pass -- skip the (costly) deferred shader bind + return; + } + bool rigged = false; if (pass >= 12) { @@ -97,7 +133,10 @@ void LLDrawPoolMaterials::endDeferredPass(S32 pass) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL; - mShader->unbind(); + if (!isPassEmpty(pass)) + { // only unbind if beginDeferredPass actually bound a shader for this pass + mShader->unbind(); + } LLRenderPass::endRenderPass(pass); } @@ -105,25 +144,11 @@ void LLDrawPoolMaterials::endDeferredPass(S32 pass) void LLDrawPoolMaterials::renderDeferred(S32 pass) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL; - static const U32 type_list[] = - { - LLRenderPass::PASS_MATERIAL, - //LLRenderPass::PASS_MATERIAL_ALPHA, - LLRenderPass::PASS_MATERIAL_ALPHA_MASK, - LLRenderPass::PASS_MATERIAL_ALPHA_EMISSIVE, - LLRenderPass::PASS_SPECMAP, - //LLRenderPass::PASS_SPECMAP_BLEND, - LLRenderPass::PASS_SPECMAP_MASK, - LLRenderPass::PASS_SPECMAP_EMISSIVE, - LLRenderPass::PASS_NORMMAP, - //LLRenderPass::PASS_NORMMAP_BLEND, - LLRenderPass::PASS_NORMMAP_MASK, - LLRenderPass::PASS_NORMMAP_EMISSIVE, - LLRenderPass::PASS_NORMSPEC, - //LLRenderPass::PASS_NORMSPEC_BLEND, - LLRenderPass::PASS_NORMSPEC_MASK, - LLRenderPass::PASS_NORMSPEC_EMISSIVE, - }; + + if (isPassEmpty(pass)) + { // beginDeferredPass skipped the bind for this empty pass; nothing to draw + return; + } bool rigged = false; if (pass >= 12) @@ -132,9 +157,9 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass) pass -= 12; } - llassert(pass < sizeof(type_list)/sizeof(U32)); + llassert(pass < sizeof(sMaterialPassType)/sizeof(U32)); - U32 type = type_list[pass]; + U32 type = sMaterialPassType[pass]; if (rigged) { type += 1; diff --git a/indra/newview/lldrawpoolmaterials.h b/indra/newview/lldrawpoolmaterials.h index 345697ffd15..5b10a6cd4f4 100644 --- a/indra/newview/lldrawpoolmaterials.h +++ b/indra/newview/lldrawpoolmaterials.h @@ -41,6 +41,11 @@ class LLGLSLShader; class LLDrawPoolMaterials : public LLRenderPass { LLGLSLShader *mShader; + + // True when this pass's render map is empty (no geometry). begin/render/end all + // consult it so an empty pass skips the deferred shader bind/unbind entirely -- + // modern PBR/simple scenes leave most of the 12 legacy material passes empty. + bool isPassEmpty(S32 pass); public: LLDrawPoolMaterials(); From b56963c01a14500540074dae7745c3552294cf17 Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 16:19:38 -0400 Subject: [PATCH 5/8] newview: skip empty bump passes and cache bump-map binds The bump pool's deferred render bound gDeferredBumpProgram and re-bound the bump-image texture for every draw, even with no bump geometry (the common case in modern PBR scenes). Skip each static/rigged pass when its render map is empty, and cache the alpha-mask cutoff + bump-image bind across runs of faces that share them (faces are sorted bumpmap-then-texture). bindBumpMap's only side effect (addTextureStats) is max-based on the source texture, so skipping a repeat is a no-op there. Visually unchanged -- pure CPU/GL-call reduction. Co-Authored-By: Claude Opus 4.8 (1M context) --- indra/newview/lldrawpoolbump.cpp | 35 ++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp index a07eeb1bedd..b1d009afac2 100644 --- a/indra/newview/lldrawpoolbump.cpp +++ b/indra/newview/lldrawpoolbump.cpp @@ -546,28 +546,51 @@ void LLDrawPoolBump::renderDeferred(S32 pass) for (int i = 0; i < 2; ++i) { bool rigged = i == 1; + + U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP; + LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type); + LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type); + if (begin == end) + { // no bump geometry in this pass -- skip the shader bind and texture setup + continue; + } + gDeferredBumpProgram.bind(rigged); diffuse_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP); bump_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::BUMP_MAP); gGL.getTexUnit(diffuse_channel)->unbind(LLTexUnit::TT_TEXTURE); gGL.getTexUnit(bump_channel)->unbind(LLTexUnit::TT_TEXTURE); - U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP; - LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type); - LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type); - const LLVOAvatar* lastAvatar = nullptr; U64 lastMeshId = 0; bool skipLastSkin = false; + // Faces are sorted by bumpmap then texture, so the alpha-mask cutoff and the + // bump-image bind (an image lookup + texture bind) repeat across runs of faces. + // Skip them when unchanged. (bindBumpMap's only side effect, addTextureStats, is + // max-based on the source texture, so skipping a repeat is a no-op there too.) + U8 lastBump = 255; + LLViewerTexture* lastBumpTex = nullptr; + F32 lastAlpha = -1.f; + for (LLCullResult::drawinfo_iterator i = begin; i != end; ) { LLDrawInfo& params = **i; LLCullResult::increment_iterator(i, end); - LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(params.mAlphaMaskCutoff); - LLDrawPoolBump::bindBumpMap(params, bump_channel); + if (params.mAlphaMaskCutoff != lastAlpha) + { + lastAlpha = params.mAlphaMaskCutoff; + LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(lastAlpha); + } + + if (params.mBump != lastBump || params.mTexture.get() != lastBumpTex) + { + lastBump = params.mBump; + lastBumpTex = params.mTexture.get(); + LLDrawPoolBump::bindBumpMap(params, bump_channel); + } if (rigged) { From 5688d3b22c344c907280c5c4f5ab779e5a53ea99 Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 23:44:19 -0400 Subject: [PATCH 6/8] llrender: add LLRenderTarget::copyContents, replace shader copy passes with blits Add LLRenderTarget::copyContents (FBO->FBO glBlitFramebuffer, with a glCopyTexSubImage2D fallback when depth/stencil layouts differ) and the static copyContentsToFramebuffer helper. Use it to replace the gCopyProgram/gCopyDepthProgram fullscreen-triangle copy passes with direct framebuffer blits: - water distortion depth copy (LLDrawPoolWater::beginPostDeferredPass) - screen-space reflection scene copy (copyScreenSpaceReflections) - atmospherics / water haze depth copy (doAtmospherics, doWaterHaze) - auto-exposure history copy (generateExposure) Removes the now-unused gCopyDepthProgram (shader load + extern declaration) and the stray gCopyProgram extern in llviewertexture.cpp. Co-Authored-By: Claude Opus 4.8 (1M context) --- indra/llrender/llrendertarget.cpp | 77 +++++++++++++++++++++++++++++ indra/llrender/llrendertarget.h | 6 +++ indra/newview/lldrawpoolwater.cpp | 16 +----- indra/newview/llviewershadermgr.cpp | 12 ----- indra/newview/llviewershadermgr.h | 1 - indra/newview/llviewertexture.cpp | 2 - indra/newview/pipeline.cpp | 67 ++++--------------------- 7 files changed, 95 insertions(+), 86 deletions(-) diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp index c3945220a67..edfaa2a8d28 100644 --- a/indra/llrender/llrendertarget.cpp +++ b/indra/llrender/llrendertarget.cpp @@ -641,6 +641,83 @@ void LLRenderTarget::flush() } } +void LLRenderTarget::copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1, + S32 dstY1, U32 mask, U32 filter) +{ + LL_PROFILE_GPU_ZONE("LLRenderTarget::copyContents"); + + GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE; + + LLGLDepthTest depth(write_depth, write_depth); + + gGL.flush(); + if (!source.mFBO || !mFBO) + { + LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL; + return; + } + + if (mask == GL_DEPTH_BUFFER_BIT && source.mStencil != mStencil) + { + stop_glerror(); + + glBindFramebuffer(GL_FRAMEBUFFER, source.mFBO); + check_framebuffer_status(); + gGL.getTexUnit(0)->bind(this, true); + stop_glerror(); + glCopyTexSubImage2D(LLTexUnit::getInternalType(mUsage), 0, srcX0, srcY0, dstX0, dstY0, dstX1, dstY1); + stop_glerror(); + glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO); + stop_glerror(); + } + else + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO); + stop_glerror(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mFBO); + stop_glerror(); + check_framebuffer_status(); + stop_glerror(); + glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter); + stop_glerror(); + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + stop_glerror(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + stop_glerror(); + glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO); + stop_glerror(); + } +} + +// static +void LLRenderTarget::copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, + S32 dstX1, S32 dstY1, U32 mask, U32 filter) +{ + if (!source.mFBO) + { + LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL; + return; + } + + { + LL_PROFILE_GPU_ZONE("copyContentsToFramebuffer"); + GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE; + + LLGLDepthTest depth(write_depth, write_depth); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO); + stop_glerror(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + stop_glerror(); + check_framebuffer_status(); + stop_glerror(); + glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter); + stop_glerror(); + glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO); + stop_glerror(); + } +} + bool LLRenderTarget::isComplete() const { return !mTex.empty() || mDepth; diff --git a/indra/llrender/llrendertarget.h b/indra/llrender/llrendertarget.h index 52ba645e34e..11d0a8602f8 100644 --- a/indra/llrender/llrendertarget.h +++ b/indra/llrender/llrendertarget.h @@ -170,6 +170,12 @@ class LLRenderTarget // asserts that this target is currently bound void flush(); + void copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1, S32 dstY1, + U32 mask, U32 filter); + + static void copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, + S32 dstX1, S32 dstY1, U32 mask, U32 filter); + //Returns TRUE if target is ready to be rendered into. //That is, if the target has been allocated with at least //one renderable attachment (i.e. color buffer, depth buffer). diff --git a/indra/newview/lldrawpoolwater.cpp b/indra/newview/lldrawpoolwater.cpp index d4dd4d1214f..01fe2840a43 100644 --- a/indra/newview/lldrawpoolwater.cpp +++ b/indra/newview/lldrawpoolwater.cpp @@ -116,22 +116,10 @@ void LLDrawPoolWater::beginPostDeferredPass(S32 pass) LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); LLRenderTarget& src = gPipeline.mRT->screen; - LLRenderTarget& depth_src = gPipeline.mRT->deferredScreen; LLRenderTarget& dst = gPipeline.mWaterDis; - dst.bindTarget(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(&src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - gPipeline.mScreenTriangleVB->setBuffer(); - gPipeline.mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst.flush(); + dst.copyContents(src, 0, 0, src.getWidth(), src.getHeight(), 0, 0, dst.getWidth(), dst.getHeight(), + GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); } } diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index 69d981c7c1e..e1b51e05f20 100644 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -3566,18 +3566,6 @@ bool LLViewerShaderMgr::loadShadersInterface() success = gCopyProgram.createShader(); } - if (success) - { - gCopyDepthProgram.mName = "Copy Depth Shader"; - gCopyDepthProgram.mShaderFiles.clear(); - gCopyDepthProgram.mShaderFiles.push_back(make_pair("interface/copyV.glsl", GL_VERTEX_SHADER)); - gCopyDepthProgram.mShaderFiles.push_back(make_pair("interface/copyF.glsl", GL_FRAGMENT_SHADER)); - gCopyDepthProgram.clearPermutations(); - gCopyDepthProgram.addPermutation("COPY_DEPTH", "1"); - gCopyDepthProgram.mShaderLevel = mShaderLevel[SHADER_INTERFACE]; - success = gCopyDepthProgram.createShader(); - } - if (success) { gDrawColorProgram.mName = "Draw Color Shader"; diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h index 903bdb3d35d..75bedaf985e 100644 --- a/indra/newview/llviewershadermgr.h +++ b/indra/newview/llviewershadermgr.h @@ -176,7 +176,6 @@ extern LLGLSLShader gClipProgram; extern LLGLSLShader gBenchmarkProgram; extern LLGLSLShader gReflectionProbeDisplayProgram; extern LLGLSLShader gCopyProgram; -extern LLGLSLShader gCopyDepthProgram; extern LLGLSLShader gPBRTerrainBakeProgram; extern LLGLSLShader gDrawColorProgram; diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp index 41c7f9cd014..f7baafbdd1d 100644 --- a/indra/newview/llviewertexture.cpp +++ b/indra/newview/llviewertexture.cpp @@ -3210,8 +3210,6 @@ void LLViewerLODTexture::processTextureStats() } } -extern LLGLSLShader gCopyProgram; - bool LLViewerLODTexture::scaleDown() { if (mGLTexturep.isNull() || !mGLTexturep->getHasGLTexture()) diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index e16c2c405ec..ac0aba85e08 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7312,14 +7312,8 @@ void LLPipeline::generateExposure(LLRenderTarget* src, LLRenderTarget* dst, bool if (use_history) { // copy last frame's exposure into mLastExposure - mLastExposure.bindTarget(); - gCopyProgram.bind(); - gGL.getTexUnit(0)->bind(dst); - - mScreenTriangleVB->setBuffer(); - mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - mLastExposure.flush(); + mLastExposure.copyContents(*dst, 0, 0, dst->getWidth(), dst->getHeight(), 0, 0, mLastExposure.getWidth(), mLastExposure.getHeight(), + GL_COLOR_BUFFER_BIT, GL_NEAREST); } dst->bindTarget(); @@ -7977,23 +7971,8 @@ void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* { LL_PROFILE_GPU_ZONE("ssr copy"); LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); - - LLRenderTarget& depth_src = mRT->deferredScreen; - - dst->bindTarget(); - dst->clear(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - mScreenTriangleVB->setBuffer(); - mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst->flush(); + dst->copyContents(*src, 0, 0, src->getWidth(), src->getHeight(), 0, 0, dst->getWidth(), dst->getHeight(), + GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); } } @@ -9894,24 +9873,11 @@ void LLPipeline::doAtmospherics() LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); LLRenderTarget& src = gPipeline.mRT->screen; - LLRenderTarget& depth_src = gPipeline.mRT->deferredScreen; LLRenderTarget& dst = gPipeline.mWaterDis; - mRT->screen.flush(); - dst.bindTarget(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(&src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - gGL.setColorMask(false, false); - gPipeline.mScreenTriangleVB->setBuffer(); - gPipeline.mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst.flush(); + src.flush(); + dst.copyContents(src, 0, 0, src.getWidth(), src.getHeight(), 0, 0, dst.getWidth(), dst.getHeight(), + GL_DEPTH_BUFFER_BIT, GL_NEAREST); mRT->screen.bindTarget(); } @@ -9958,24 +9924,11 @@ void LLPipeline::doWaterHaze() LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); LLRenderTarget& src = gPipeline.mRT->screen; - LLRenderTarget& depth_src = gPipeline.mRT->deferredScreen; LLRenderTarget& dst = gPipeline.mWaterDis; - mRT->screen.flush(); - dst.bindTarget(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(&src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - gGL.setColorMask(false, false); - gPipeline.mScreenTriangleVB->setBuffer(); - gPipeline.mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst.flush(); + src.flush(); + dst.copyContents(src, 0, 0, src.getWidth(), src.getHeight(), 0, 0, dst.getWidth(), dst.getHeight(), + GL_DEPTH_BUFFER_BIT, GL_NEAREST); mRT->screen.bindTarget(); } From ecd04253043d022c6ac054166da6cfabd6e7f856 Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 23:02:23 -0400 Subject: [PATCH 7/8] Use actual floating point trunc and round. This vectorizes to better asm. --- indra/llmath/llmath.h | 60 +++++-------------------------------------- 1 file changed, 6 insertions(+), 54 deletions(-) diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index deb05848855..80a1ba7028f 100644 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -155,14 +155,14 @@ constexpr F64 llabs(const F64 a) noexcept return std::bit_cast(std::bit_cast(a) & 0x7fffffffffffffffull); } -constexpr S32 lltrunc(F32 f) +inline S32 lltrunc(F32 f) { - return narrow(f); + return (S32)std::trunc(f); } -constexpr S32 lltrunc(F64 f) +inline S32 lltrunc(F64 f) { - return narrow(f); + return (S32)std::trunc(f); } inline S32 llfloor(F32 f) @@ -184,67 +184,19 @@ inline S32 llfloor(F32 f) #endif } - inline S32 llceil( F32 f ) { // This could probably be optimized, but this works. return (S32)ceil(f); } - -#ifndef BOGUS_ROUND -// Use this round. Does an arithmetic round (0.5 always rounds up) inline S32 ll_round(const F32 val) { - return llfloor(val + 0.5f); + return (S32)lround(val); } - -#else // BOGUS_ROUND -// Old ll_round implementation - does banker's round (toward nearest even in the case of a 0.5. -// Not using this because we don't have a consistent implementation on both platforms, use -// llfloor(val + 0.5f), which is consistent on all platforms. -inline S32 ll_round(const F32 val) -{ - #if LL_WINDOWS - // Note: assumes that the floating point control word is set to rounding mode (the default) - S32 ret_val; - _asm fld val - _asm fistp ret_val; - return ret_val; - #elif LL_LINUX - // Note: assumes that the floating point control word is set - // to rounding mode (the default) - S32 ret_val; - __asm__ __volatile__( "flds %1 \n\t" - "fistpl %0 \n\t" - : "=m" (ret_val) - : "m" (val) ); - return ret_val; - #else - return llfloor(val + 0.5f); - #endif -} - -// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr -inline int round_int(double x) -{ - const float round_to_nearest = 0.5f; - int i; - __asm - { - fld x - fadd st, st (0) - fadd round_to_nearest - fistp i - sar i, 1 - } - return (i); -} -#endif // BOGUS_ROUND - inline F64 ll_round(const F64 val) { - return F64(floor(val + 0.5f)); + return round(val); } inline F32 ll_round( F32 val, F32 nearest ) From bff2bef05d00e13a4233e439561ec3ab3aadd200 Mon Sep 17 00:00:00 2001 From: Rye Date: Sun, 14 Jun 2026 23:33:56 -0400 Subject: [PATCH 8/8] llrender: fix glCopyTexSubImage2D arg order in copyContents depth fallback The stencil-mismatch fallback in LLRenderTarget::copyContents passed source coordinates as the destination texel offset, destination offsets as the source framebuffer origin, and endpoint coordinates where width/height were expected. Reorder to the correct signature (target, level, xoffset, yoffset, x, y, width, height): destination offset = dstX0/dstY0, source origin = srcX0/srcY0, dimensions = srcX1-srcX0 / srcY1-srcY0. No behavior change for the existing call sites (all copy full, same-size targets at origin), but the fallback is now correct for sub-rect / mismatched-size depth copies. Co-Authored-By: Claude Opus 4.8 (1M context) --- indra/llrender/llrendertarget.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp index edfaa2a8d28..498668b0bf1 100644 --- a/indra/llrender/llrendertarget.cpp +++ b/indra/llrender/llrendertarget.cpp @@ -665,7 +665,10 @@ void LLRenderTarget::copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, check_framebuffer_status(); gGL.getTexUnit(0)->bind(this, true); stop_glerror(); - glCopyTexSubImage2D(LLTexUnit::getInternalType(mUsage), 0, srcX0, srcY0, dstX0, dstY0, dstX1, dstY1); + // glCopyTexSubImage2D(target, level, xoffset, yoffset, x, y, width, height): + // xoffset/yoffset are the destination texel offset, x/y the source framebuffer + // origin, and the last two are dimensions (not endpoints). + glCopyTexSubImage2D(LLTexUnit::getInternalType(mUsage), 0, dstX0, dstY0, srcX0, srcY0, srcX1 - srcX0, srcY1 - srcY0); stop_glerror(); glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO); stop_glerror();