AlchemyViewer · RyeMutt · Jun 15, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h
@@ -155,14 +155,14 @@ constexpr F64 llabs(const F64 a) noexcept
     return std::bit_cast<F64>(std::bit_cast<U64>(a) & 0x7fffffffffffffffull);
 }
 
-constexpr S32 lltrunc(F32 f)
+inline S32 lltrunc(F32 f)
 {
-    return narrow(f);
+    return (S32)std::trunc(f);
 }
 
-constexpr S32 lltrunc(F64 f)
+inline S32 lltrunc(F64 f)
 {
-    return narrow(f);
+    return (S32)std::trunc(f);
 }
 
 inline S32 llfloor(F32 f)
@@ -184,67 +184,19 @@ inline S32 llfloor(F32 f)
 #endif
 }
 
-
 inline S32 llceil( F32 f )
 {
     // This could probably be optimized, but this works.
     return (S32)ceil(f);
 }
 
-
-#ifndef BOGUS_ROUND
-// Use this round.  Does an arithmetic round (0.5 always rounds up)
 inline S32 ll_round(const F32 val)
 {
-    return llfloor(val + 0.5f);
+    return (S32)lround(val);
 }
-
-#else // BOGUS_ROUND
-// Old ll_round implementation - does banker's round (toward nearest even in the case of a 0.5.
-// Not using this because we don't have a consistent implementation on both platforms, use
-// llfloor(val + 0.5f), which is consistent on all platforms.
-inline S32 ll_round(const F32 val)
-{
-    #if LL_WINDOWS
-        // Note: assumes that the floating point control word is set to rounding mode (the default)
-        S32 ret_val;
-        _asm fld    val
-        _asm fistp  ret_val;
-        return ret_val;
-    #elif LL_LINUX
-        // Note: assumes that the floating point control word is set
-        // to rounding mode (the default)
-        S32 ret_val;
-        __asm__ __volatile__( "flds %1    \n\t"
-                              "fistpl %0  \n\t"
-                              : "=m" (ret_val)
-                              : "m" (val) );
-        return ret_val;
-    #else
-        return llfloor(val + 0.5f);
-    #endif
-}
-
-// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
-inline int round_int(double x)
-{
-    const float round_to_nearest = 0.5f;
-    int i;
-    __asm
-    {
-        fld x
-        fadd st, st (0)
-        fadd round_to_nearest
-        fistp i
-        sar i, 1
-    }
-    return (i);
-}
-#endif // BOGUS_ROUND
-
 inline F64 ll_round(const F64 val)
 {
-    return F64(floor(val + 0.5f));
+    return round(val);
 }
 
 inline F32 ll_round( F32 val, F32 nearest )

diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp
@@ -641,6 +641,86 @@ void LLRenderTarget::flush()
     }
 }
 
+void LLRenderTarget::copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1,
+                                  S32 dstY1, U32 mask, U32 filter)
+{
+    LL_PROFILE_GPU_ZONE("LLRenderTarget::copyContents");
+
+    GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE;
+
+    LLGLDepthTest depth(write_depth, write_depth);
+
+    gGL.flush();
+    if (!source.mFBO || !mFBO)
+    {
+        LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL;
+        return;
+    }
+
+    if (mask == GL_DEPTH_BUFFER_BIT && source.mStencil != mStencil)
+    {
+        stop_glerror();
+
+        glBindFramebuffer(GL_FRAMEBUFFER, source.mFBO);
+        check_framebuffer_status();
+        gGL.getTexUnit(0)->bind(this, true);
+        stop_glerror();
+        // glCopyTexSubImage2D(target, level, xoffset, yoffset, x, y, width, height):
+        // xoffset/yoffset are the destination texel offset, x/y the source framebuffer
+        // origin, and the last two are dimensions (not endpoints).
+        glCopyTexSubImage2D(LLTexUnit::getInternalType(mUsage), 0, dstX0, dstY0, srcX0, srcY0, srcX1 - srcX0, srcY1 - srcY0);
+        stop_glerror();
+        glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO);
+        stop_glerror();
+    }
+    else
+    {
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO);
+        stop_glerror();
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mFBO);
+        stop_glerror();
+        check_framebuffer_status();
+        stop_glerror();
+        glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter);
+        stop_glerror();
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
+        stop_glerror();
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+        stop_glerror();
+        glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO);
+        stop_glerror();
+    }
+}
+
+// static
+void LLRenderTarget::copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0,
+                                               S32 dstX1, S32 dstY1, U32 mask, U32 filter)
+{
+    if (!source.mFBO)
+    {
+        LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL;
+        return;
+    }
+
+    {
+        LL_PROFILE_GPU_ZONE("copyContentsToFramebuffer");
+        GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE;
+
+        LLGLDepthTest depth(write_depth, write_depth);
+
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO);
+        stop_glerror();
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+        stop_glerror();
+        check_framebuffer_status();
+        stop_glerror();
+        glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter);
+        stop_glerror();
+        glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO);
+        stop_glerror();
+    }
+}
+
 bool LLRenderTarget::isComplete() const
 {
     return !mTex.empty() || mDepth;

diff --git a/indra/llrender/llrendertarget.h b/indra/llrender/llrendertarget.h
@@ -170,6 +170,12 @@ class LLRenderTarget
     // asserts  that this target is currently bound
     void flush();
 
+    void copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1, S32 dstY1,
+                      U32 mask, U32 filter);
+
+    static void copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0,
+                                          S32 dstX1, S32 dstY1, U32 mask, U32 filter);
+
     //Returns TRUE if target is ready to be rendered into.
     //That is, if the target has been allocated with at least
     //one renderable attachment (i.e. color buffer, depth buffer).

diff --git a/indra/newview/app_settings/settings_alchemy.xml b/indra/newview/app_settings/settings_alchemy.xml
@@ -1158,6 +1158,28 @@
             <key>Value</key>
             <integer>0</integer>
         </map>
+        <key>RenderAvatarShadowDetail</key>
+        <map>
+            <key>Comment</key>
+            <string>Which avatar passes cast shadows. 0 = opaque only, 1 = opaque + alpha mask, 2 = full (also alpha blend). Lower values speed up crowd scenes by skipping the expensive alpha-blend avatar shadow pass across all cascades.</string>
+            <key>Persist</key>
+            <integer>1</integer>
+            <key>Type</key>
+            <string>S32</string>
+            <key>Value</key>
+            <integer>2</integer>
+        </map>
+        <key>RenderShadowCullMode</key>
+        <map>
+            <key>Comment</key>
+            <string>How sun shadow cascades are culled. 0 = cull and sort each cascade separately (default). 1 = cull and sort once against a frustum spanning all cascades, sharing the result (less CPU per frame, more GPU vertex work per cascade). Experimental.</string>
+            <key>Persist</key>
+            <integer>1</integer>
+            <key>Type</key>
+            <string>S32</string>
+            <key>Value</key>
+            <integer>0</integer>
+        </map>
         <key>RenderBloomHDR</key>
         <map>
             <key>Comment</key>

diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp
@@ -786,6 +786,8 @@ void LLRenderPass::pushGLTFBatches(U32 type, bool textured)
 void LLRenderPass::pushGLTFBatches(U32 type)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
+    LLFetchedGLTFMaterial* lastMat = nullptr;
+    LLViewerTexture* lastTex = nullptr;
     auto* begin = gPipeline.beginRenderMap(type);
     auto* end = gPipeline.endRenderMap(type);
     for (LLCullResult::drawinfo_iterator i = begin; i != end; )
@@ -794,7 +796,7 @@ void LLRenderPass::pushGLTFBatches(U32 type)
         LLDrawInfo& params = **i;
         LLCullResult::increment_iterator(i, end);
 
-        pushGLTFBatch(params);
+        pushGLTFBatch(params, lastMat, lastTex);
     }
 }
 
@@ -814,16 +816,25 @@ void LLRenderPass::pushUntexturedGLTFBatches(U32 type)
 }
 
 // static
-void LLRenderPass::pushGLTFBatch(LLDrawInfo& params)
+void LLRenderPass::pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex)
 {
-    auto& mat = params.mGLTFMaterial;
+    LLFetchedGLTFMaterial* mat = params.mGLTFMaterial.get();
 
-    if (mat.notNull())
+    if (mat)
     {
-        mat->bind(params.mTexture);
+        // params.mTexture is the media override (bind() applies it to base color
+        // and emissive), so it is part of the cache key -- otherwise media faces
+        // sharing a material would render with a stale base texture.
+        LLViewerTexture* tex = params.mTexture.get();
+        if (mat != lastMat || tex != lastTex)
+        {
+            mat->bind(params.mTexture);
+            lastMat = mat;
+            lastTex = tex;
+        }
     }
 
-    LLGLDisable cull_face(mat.notNull() && mat->mDoubleSided ? GL_CULL_FACE : 0);
+    LLGLDisable cull_face(mat && mat->mDoubleSided ? GL_CULL_FACE : 0);
 
     setup_texture_matrix(params);
 
@@ -866,6 +877,8 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type)
     const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
     bool skipLastSkin = false;
+    LLFetchedGLTFMaterial* lastMat = nullptr;
+    LLViewerTexture* lastTex = nullptr;
 
     auto* begin = gPipeline.beginRenderMap(type);
     auto* end = gPipeline.endRenderMap(type);
@@ -875,7 +888,7 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type)
         LLDrawInfo& params = **i;
         LLCullResult::increment_iterator(i, end);
 
-        pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin);
+        pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin, lastMat, lastTex);
     }
 }
 
@@ -900,11 +913,11 @@ void LLRenderPass::pushUntexturedRiggedGLTFBatches(U32 type)
 
 
 // static
-void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin)
+void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex)
 {
     if (uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
     {
-        pushGLTFBatch(params);
+        pushGLTFBatch(params, lastMat, lastTex);
     }
 }
 

diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h
@@ -40,6 +40,7 @@ class LLDrawInfo;
 class LLVOAvatar;
 class LLGLSLShader;
 class LLMeshSkinInfo;
+class LLFetchedGLTFMaterial;
 
 class LLDrawPool
 {
@@ -376,8 +377,10 @@ class LLRenderPass : public LLDrawPool
     void pushUntexturedRiggedGLTFBatches(U32 type);
 
     // push a single GLTF draw call
-    static void pushGLTFBatch(LLDrawInfo& params);
-    static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin);
+    // lastMat/lastTex track the most recently bound material+media texture so
+    // consecutive draws sharing a material skip the redundant LLFetchedGLTFMaterial::bind
+    static void pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex);
+    static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex);
     static void pushUntexturedGLTFBatch(LLDrawInfo& params);
     static void pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin);
 

diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
@@ -397,6 +397,18 @@ void LLDrawPoolAvatar::renderShadow(S32 pass)
         return;
     }
 
+    // Optionally skip the costlier avatar shadow passes (alpha blend is the most
+    // expensive and least visually important; alpha mask next). Default 2 = full.
+    static LLCachedControl<S32> avatar_shadow_detail(gSavedSettings, "RenderAvatarShadowDetail", 2);
+    if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND && avatar_shadow_detail() < 2)
+    {
+        return;
+    }
+    if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK && avatar_shadow_detail() < 1)
+    {
+        return;
+    }
+
     LLDrawPoolAvatar::sShadowPass = pass;
 
     if (pass == SHADOW_PASS_AVATAR_OPAQUE)

diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp
@@ -546,28 +546,51 @@ void LLDrawPoolBump::renderDeferred(S32 pass)
     for (int i = 0; i < 2; ++i)
     {
         bool rigged = i == 1;
+
+        U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP;
+        LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type);
+        LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type);
+        if (begin == end)
+        {   // no bump geometry in this pass -- skip the shader bind and texture setup
+            continue;
+        }
+
         gDeferredBumpProgram.bind(rigged);
         diffuse_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
         bump_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::BUMP_MAP);
         gGL.getTexUnit(diffuse_channel)->unbind(LLTexUnit::TT_TEXTURE);
         gGL.getTexUnit(bump_channel)->unbind(LLTexUnit::TT_TEXTURE);
 
-        U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP;
-        LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type);
-        LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type);
-
         const LLVOAvatar* lastAvatar = nullptr;
         U64 lastMeshId = 0;
         bool skipLastSkin = false;
 
+        // Faces are sorted by bumpmap then texture, so the alpha-mask cutoff and the
+        // bump-image bind (an image lookup + texture bind) repeat across runs of faces.
+        // Skip them when unchanged. (bindBumpMap's only side effect, addTextureStats, is
+        // max-based on the source texture, so skipping a repeat is a no-op there too.)
+        U8 lastBump = 255;
+        LLViewerTexture* lastBumpTex = nullptr;
+        F32 lastAlpha = -1.f;
+
         for (LLCullResult::drawinfo_iterator i = begin; i != end; )
         {
             LLDrawInfo& params = **i;
 
             LLCullResult::increment_iterator(i, end);
 
-            LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(params.mAlphaMaskCutoff);
-            LLDrawPoolBump::bindBumpMap(params, bump_channel);
+            if (params.mAlphaMaskCutoff != lastAlpha)
+            {
+                lastAlpha = params.mAlphaMaskCutoff;
+                LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(lastAlpha);
+            }
+
+            if (params.mBump != lastBump || params.mTexture.get() != lastBumpTex)
+            {
+                lastBump = params.mBump;
+                lastBumpTex = params.mTexture.get();
+                LLDrawPoolBump::bindBumpMap(params, bump_channel);
+            }
 
             if (rigged)
             {