diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index deb0584885..80a1ba7028 100644 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -155,14 +155,14 @@ constexpr F64 llabs(const F64 a) noexcept return std::bit_cast(std::bit_cast(a) & 0x7fffffffffffffffull); } -constexpr S32 lltrunc(F32 f) +inline S32 lltrunc(F32 f) { - return narrow(f); + return (S32)std::trunc(f); } -constexpr S32 lltrunc(F64 f) +inline S32 lltrunc(F64 f) { - return narrow(f); + return (S32)std::trunc(f); } inline S32 llfloor(F32 f) @@ -184,67 +184,19 @@ inline S32 llfloor(F32 f) #endif } - inline S32 llceil( F32 f ) { // This could probably be optimized, but this works. return (S32)ceil(f); } - -#ifndef BOGUS_ROUND -// Use this round. Does an arithmetic round (0.5 always rounds up) inline S32 ll_round(const F32 val) { - return llfloor(val + 0.5f); + return (S32)lround(val); } - -#else // BOGUS_ROUND -// Old ll_round implementation - does banker's round (toward nearest even in the case of a 0.5. -// Not using this because we don't have a consistent implementation on both platforms, use -// llfloor(val + 0.5f), which is consistent on all platforms. -inline S32 ll_round(const F32 val) -{ - #if LL_WINDOWS - // Note: assumes that the floating point control word is set to rounding mode (the default) - S32 ret_val; - _asm fld val - _asm fistp ret_val; - return ret_val; - #elif LL_LINUX - // Note: assumes that the floating point control word is set - // to rounding mode (the default) - S32 ret_val; - __asm__ __volatile__( "flds %1 \n\t" - "fistpl %0 \n\t" - : "=m" (ret_val) - : "m" (val) ); - return ret_val; - #else - return llfloor(val + 0.5f); - #endif -} - -// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr -inline int round_int(double x) -{ - const float round_to_nearest = 0.5f; - int i; - __asm - { - fld x - fadd st, st (0) - fadd round_to_nearest - fistp i - sar i, 1 - } - return (i); -} -#endif // BOGUS_ROUND - inline F64 ll_round(const F64 val) { - return F64(floor(val + 0.5f)); + return round(val); } inline F32 ll_round( F32 val, F32 nearest ) diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp index c3945220a6..498668b0bf 100644 --- a/indra/llrender/llrendertarget.cpp +++ b/indra/llrender/llrendertarget.cpp @@ -641,6 +641,86 @@ void LLRenderTarget::flush() } } +void LLRenderTarget::copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1, + S32 dstY1, U32 mask, U32 filter) +{ + LL_PROFILE_GPU_ZONE("LLRenderTarget::copyContents"); + + GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE; + + LLGLDepthTest depth(write_depth, write_depth); + + gGL.flush(); + if (!source.mFBO || !mFBO) + { + LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL; + return; + } + + if (mask == GL_DEPTH_BUFFER_BIT && source.mStencil != mStencil) + { + stop_glerror(); + + glBindFramebuffer(GL_FRAMEBUFFER, source.mFBO); + check_framebuffer_status(); + gGL.getTexUnit(0)->bind(this, true); + stop_glerror(); + // glCopyTexSubImage2D(target, level, xoffset, yoffset, x, y, width, height): + // xoffset/yoffset are the destination texel offset, x/y the source framebuffer + // origin, and the last two are dimensions (not endpoints). + glCopyTexSubImage2D(LLTexUnit::getInternalType(mUsage), 0, dstX0, dstY0, srcX0, srcY0, srcX1 - srcX0, srcY1 - srcY0); + stop_glerror(); + glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO); + stop_glerror(); + } + else + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO); + stop_glerror(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mFBO); + stop_glerror(); + check_framebuffer_status(); + stop_glerror(); + glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter); + stop_glerror(); + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + stop_glerror(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + stop_glerror(); + glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO); + stop_glerror(); + } +} + +// static +void LLRenderTarget::copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, + S32 dstX1, S32 dstY1, U32 mask, U32 filter) +{ + if (!source.mFBO) + { + LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL; + return; + } + + { + LL_PROFILE_GPU_ZONE("copyContentsToFramebuffer"); + GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE; + + LLGLDepthTest depth(write_depth, write_depth); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO); + stop_glerror(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + stop_glerror(); + check_framebuffer_status(); + stop_glerror(); + glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter); + stop_glerror(); + glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO); + stop_glerror(); + } +} + bool LLRenderTarget::isComplete() const { return !mTex.empty() || mDepth; diff --git a/indra/llrender/llrendertarget.h b/indra/llrender/llrendertarget.h index 52ba645e34..11d0a8602f 100644 --- a/indra/llrender/llrendertarget.h +++ b/indra/llrender/llrendertarget.h @@ -170,6 +170,12 @@ class LLRenderTarget // asserts that this target is currently bound void flush(); + void copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1, S32 dstY1, + U32 mask, U32 filter); + + static void copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, + S32 dstX1, S32 dstY1, U32 mask, U32 filter); + //Returns TRUE if target is ready to be rendered into. //That is, if the target has been allocated with at least //one renderable attachment (i.e. color buffer, depth buffer). diff --git a/indra/newview/app_settings/settings_alchemy.xml b/indra/newview/app_settings/settings_alchemy.xml index b323c0f901..306457b7fb 100644 --- a/indra/newview/app_settings/settings_alchemy.xml +++ b/indra/newview/app_settings/settings_alchemy.xml @@ -1158,6 +1158,28 @@ Value 0 + RenderAvatarShadowDetail + + Comment + Which avatar passes cast shadows. 0 = opaque only, 1 = opaque + alpha mask, 2 = full (also alpha blend). Lower values speed up crowd scenes by skipping the expensive alpha-blend avatar shadow pass across all cascades. + Persist + 1 + Type + S32 + Value + 2 + + RenderShadowCullMode + + Comment + How sun shadow cascades are culled. 0 = cull and sort each cascade separately (default). 1 = cull and sort once against a frustum spanning all cascades, sharing the result (less CPU per frame, more GPU vertex work per cascade). Experimental. + Persist + 1 + Type + S32 + Value + 0 + RenderBloomHDR Comment diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp index 3eca6059ed..56a3baed34 100644 --- a/indra/newview/lldrawpool.cpp +++ b/indra/newview/lldrawpool.cpp @@ -786,6 +786,8 @@ void LLRenderPass::pushGLTFBatches(U32 type, bool textured) void LLRenderPass::pushGLTFBatches(U32 type) { LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL; + LLFetchedGLTFMaterial* lastMat = nullptr; + LLViewerTexture* lastTex = nullptr; auto* begin = gPipeline.beginRenderMap(type); auto* end = gPipeline.endRenderMap(type); for (LLCullResult::drawinfo_iterator i = begin; i != end; ) @@ -794,7 +796,7 @@ void LLRenderPass::pushGLTFBatches(U32 type) LLDrawInfo& params = **i; LLCullResult::increment_iterator(i, end); - pushGLTFBatch(params); + pushGLTFBatch(params, lastMat, lastTex); } } @@ -814,16 +816,25 @@ void LLRenderPass::pushUntexturedGLTFBatches(U32 type) } // static -void LLRenderPass::pushGLTFBatch(LLDrawInfo& params) +void LLRenderPass::pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex) { - auto& mat = params.mGLTFMaterial; + LLFetchedGLTFMaterial* mat = params.mGLTFMaterial.get(); - if (mat.notNull()) + if (mat) { - mat->bind(params.mTexture); + // params.mTexture is the media override (bind() applies it to base color + // and emissive), so it is part of the cache key -- otherwise media faces + // sharing a material would render with a stale base texture. + LLViewerTexture* tex = params.mTexture.get(); + if (mat != lastMat || tex != lastTex) + { + mat->bind(params.mTexture); + lastMat = mat; + lastTex = tex; + } } - LLGLDisable cull_face(mat.notNull() && mat->mDoubleSided ? GL_CULL_FACE : 0); + LLGLDisable cull_face(mat && mat->mDoubleSided ? GL_CULL_FACE : 0); setup_texture_matrix(params); @@ -866,6 +877,8 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type) const LLVOAvatar* lastAvatar = nullptr; U64 lastMeshId = 0; bool skipLastSkin = false; + LLFetchedGLTFMaterial* lastMat = nullptr; + LLViewerTexture* lastTex = nullptr; auto* begin = gPipeline.beginRenderMap(type); auto* end = gPipeline.endRenderMap(type); @@ -875,7 +888,7 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type) LLDrawInfo& params = **i; LLCullResult::increment_iterator(i, end); - pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin); + pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin, lastMat, lastTex); } } @@ -900,11 +913,11 @@ void LLRenderPass::pushUntexturedRiggedGLTFBatches(U32 type) // static -void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin) +void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex) { if (uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin)) { - pushGLTFBatch(params); + pushGLTFBatch(params, lastMat, lastTex); } } diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h index 46696fc4a4..c645565f06 100644 --- a/indra/newview/lldrawpool.h +++ b/indra/newview/lldrawpool.h @@ -40,6 +40,7 @@ class LLDrawInfo; class LLVOAvatar; class LLGLSLShader; class LLMeshSkinInfo; +class LLFetchedGLTFMaterial; class LLDrawPool { @@ -376,8 +377,10 @@ class LLRenderPass : public LLDrawPool void pushUntexturedRiggedGLTFBatches(U32 type); // push a single GLTF draw call - static void pushGLTFBatch(LLDrawInfo& params); - static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin); + // lastMat/lastTex track the most recently bound material+media texture so + // consecutive draws sharing a material skip the redundant LLFetchedGLTFMaterial::bind + static void pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex); + static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex); static void pushUntexturedGLTFBatch(LLDrawInfo& params); static void pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin); diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index 89d85dfa2a..1cca08c20b 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -397,6 +397,18 @@ void LLDrawPoolAvatar::renderShadow(S32 pass) return; } + // Optionally skip the costlier avatar shadow passes (alpha blend is the most + // expensive and least visually important; alpha mask next). Default 2 = full. + static LLCachedControl avatar_shadow_detail(gSavedSettings, "RenderAvatarShadowDetail", 2); + if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND && avatar_shadow_detail() < 2) + { + return; + } + if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK && avatar_shadow_detail() < 1) + { + return; + } + LLDrawPoolAvatar::sShadowPass = pass; if (pass == SHADOW_PASS_AVATAR_OPAQUE) diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp index a07eeb1bed..b1d009afac 100644 --- a/indra/newview/lldrawpoolbump.cpp +++ b/indra/newview/lldrawpoolbump.cpp @@ -546,28 +546,51 @@ void LLDrawPoolBump::renderDeferred(S32 pass) for (int i = 0; i < 2; ++i) { bool rigged = i == 1; + + U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP; + LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type); + LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type); + if (begin == end) + { // no bump geometry in this pass -- skip the shader bind and texture setup + continue; + } + gDeferredBumpProgram.bind(rigged); diffuse_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP); bump_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::BUMP_MAP); gGL.getTexUnit(diffuse_channel)->unbind(LLTexUnit::TT_TEXTURE); gGL.getTexUnit(bump_channel)->unbind(LLTexUnit::TT_TEXTURE); - U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP; - LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type); - LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type); - const LLVOAvatar* lastAvatar = nullptr; U64 lastMeshId = 0; bool skipLastSkin = false; + // Faces are sorted by bumpmap then texture, so the alpha-mask cutoff and the + // bump-image bind (an image lookup + texture bind) repeat across runs of faces. + // Skip them when unchanged. (bindBumpMap's only side effect, addTextureStats, is + // max-based on the source texture, so skipping a repeat is a no-op there too.) + U8 lastBump = 255; + LLViewerTexture* lastBumpTex = nullptr; + F32 lastAlpha = -1.f; + for (LLCullResult::drawinfo_iterator i = begin; i != end; ) { LLDrawInfo& params = **i; LLCullResult::increment_iterator(i, end); - LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(params.mAlphaMaskCutoff); - LLDrawPoolBump::bindBumpMap(params, bump_channel); + if (params.mAlphaMaskCutoff != lastAlpha) + { + lastAlpha = params.mAlphaMaskCutoff; + LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(lastAlpha); + } + + if (params.mBump != lastBump || params.mTexture.get() != lastBumpTex) + { + lastBump = params.mBump; + lastBumpTex = params.mTexture.get(); + LLDrawPoolBump::bindBumpMap(params, bump_channel); + } if (rigged) { diff --git a/indra/newview/lldrawpoolmaterials.cpp b/indra/newview/lldrawpoolmaterials.cpp index e7ec2022d2..b330785811 100644 --- a/indra/newview/lldrawpoolmaterials.cpp +++ b/indra/newview/lldrawpoolmaterials.cpp @@ -50,10 +50,46 @@ S32 LLDrawPoolMaterials::getNumDeferredPasses() return 12*2; } +// Render-map pass type for each non-rigged material pass; rigged passes use type + 1. +// Kept in sync with the shader index table in beginDeferredPass and the type list in +// renderDeferred (which now indexes this same array). +static const U32 sMaterialPassType[] = +{ + LLRenderPass::PASS_MATERIAL, + LLRenderPass::PASS_MATERIAL_ALPHA_MASK, + LLRenderPass::PASS_MATERIAL_ALPHA_EMISSIVE, + LLRenderPass::PASS_SPECMAP, + LLRenderPass::PASS_SPECMAP_MASK, + LLRenderPass::PASS_SPECMAP_EMISSIVE, + LLRenderPass::PASS_NORMMAP, + LLRenderPass::PASS_NORMMAP_MASK, + LLRenderPass::PASS_NORMMAP_EMISSIVE, + LLRenderPass::PASS_NORMSPEC, + LLRenderPass::PASS_NORMSPEC_MASK, + LLRenderPass::PASS_NORMSPEC_EMISSIVE, +}; + +bool LLDrawPoolMaterials::isPassEmpty(S32 pass) +{ + bool rigged = false; + if (pass >= 12) + { + rigged = true; + pass -= 12; + } + U32 type = sMaterialPassType[pass] + (rigged ? 1 : 0); + return gPipeline.beginRenderMap(type) == gPipeline.endRenderMap(type); +} + void LLDrawPoolMaterials::beginDeferredPass(S32 pass) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL; + if (isPassEmpty(pass)) + { // nothing to draw this pass -- skip the (costly) deferred shader bind + return; + } + bool rigged = false; if (pass >= 12) { @@ -97,7 +133,10 @@ void LLDrawPoolMaterials::endDeferredPass(S32 pass) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL; - mShader->unbind(); + if (!isPassEmpty(pass)) + { // only unbind if beginDeferredPass actually bound a shader for this pass + mShader->unbind(); + } LLRenderPass::endRenderPass(pass); } @@ -105,25 +144,11 @@ void LLDrawPoolMaterials::endDeferredPass(S32 pass) void LLDrawPoolMaterials::renderDeferred(S32 pass) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL; - static const U32 type_list[] = - { - LLRenderPass::PASS_MATERIAL, - //LLRenderPass::PASS_MATERIAL_ALPHA, - LLRenderPass::PASS_MATERIAL_ALPHA_MASK, - LLRenderPass::PASS_MATERIAL_ALPHA_EMISSIVE, - LLRenderPass::PASS_SPECMAP, - //LLRenderPass::PASS_SPECMAP_BLEND, - LLRenderPass::PASS_SPECMAP_MASK, - LLRenderPass::PASS_SPECMAP_EMISSIVE, - LLRenderPass::PASS_NORMMAP, - //LLRenderPass::PASS_NORMMAP_BLEND, - LLRenderPass::PASS_NORMMAP_MASK, - LLRenderPass::PASS_NORMMAP_EMISSIVE, - LLRenderPass::PASS_NORMSPEC, - //LLRenderPass::PASS_NORMSPEC_BLEND, - LLRenderPass::PASS_NORMSPEC_MASK, - LLRenderPass::PASS_NORMSPEC_EMISSIVE, - }; + + if (isPassEmpty(pass)) + { // beginDeferredPass skipped the bind for this empty pass; nothing to draw + return; + } bool rigged = false; if (pass >= 12) @@ -132,9 +157,9 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass) pass -= 12; } - llassert(pass < sizeof(type_list)/sizeof(U32)); + llassert(pass < sizeof(sMaterialPassType)/sizeof(U32)); - U32 type = type_list[pass]; + U32 type = sMaterialPassType[pass]; if (rigged) { type += 1; diff --git a/indra/newview/lldrawpoolmaterials.h b/indra/newview/lldrawpoolmaterials.h index 345697ffd1..5b10a6cd4f 100644 --- a/indra/newview/lldrawpoolmaterials.h +++ b/indra/newview/lldrawpoolmaterials.h @@ -41,6 +41,11 @@ class LLGLSLShader; class LLDrawPoolMaterials : public LLRenderPass { LLGLSLShader *mShader; + + // True when this pass's render map is empty (no geometry). begin/render/end all + // consult it so an empty pass skips the deferred shader bind/unbind entirely -- + // modern PBR/simple scenes leave most of the 12 legacy material passes empty. + bool isPassEmpty(S32 pass); public: LLDrawPoolMaterials(); diff --git a/indra/newview/lldrawpoolwater.cpp b/indra/newview/lldrawpoolwater.cpp index d4dd4d1214..01fe2840a4 100644 --- a/indra/newview/lldrawpoolwater.cpp +++ b/indra/newview/lldrawpoolwater.cpp @@ -116,22 +116,10 @@ void LLDrawPoolWater::beginPostDeferredPass(S32 pass) LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); LLRenderTarget& src = gPipeline.mRT->screen; - LLRenderTarget& depth_src = gPipeline.mRT->deferredScreen; LLRenderTarget& dst = gPipeline.mWaterDis; - dst.bindTarget(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(&src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - gPipeline.mScreenTriangleVB->setBuffer(); - gPipeline.mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst.flush(); + dst.copyContents(src, 0, 0, src.getWidth(), src.getHeight(), 0, 0, dst.getWidth(), dst.getHeight(), + GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); } } diff --git a/indra/newview/llgltfmaterialpreviewmgr.cpp b/indra/newview/llgltfmaterialpreviewmgr.cpp index c49b751982..11499b15fb 100644 --- a/indra/newview/llgltfmaterialpreviewmgr.cpp +++ b/indra/newview/llgltfmaterialpreviewmgr.cpp @@ -510,9 +510,11 @@ bool LLGLTFPreviewTexture::render() gPipeline.bindDeferredShader(shader); fixup_shader_constants(shader); + LLFetchedGLTFMaterial* lastMat = nullptr; + LLViewerTexture* lastTex = nullptr; for (PreviewSpherePart& part : preview_sphere) { - LLRenderPass::pushGLTFBatch(*part->mDrawInfo); + LLRenderPass::pushGLTFBatch(*part->mDrawInfo, lastMat, lastTex); } gPipeline.unbindDeferredShader(shader); diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index 69d981c7c1..e1b51e05f2 100644 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -3566,18 +3566,6 @@ bool LLViewerShaderMgr::loadShadersInterface() success = gCopyProgram.createShader(); } - if (success) - { - gCopyDepthProgram.mName = "Copy Depth Shader"; - gCopyDepthProgram.mShaderFiles.clear(); - gCopyDepthProgram.mShaderFiles.push_back(make_pair("interface/copyV.glsl", GL_VERTEX_SHADER)); - gCopyDepthProgram.mShaderFiles.push_back(make_pair("interface/copyF.glsl", GL_FRAGMENT_SHADER)); - gCopyDepthProgram.clearPermutations(); - gCopyDepthProgram.addPermutation("COPY_DEPTH", "1"); - gCopyDepthProgram.mShaderLevel = mShaderLevel[SHADER_INTERFACE]; - success = gCopyDepthProgram.createShader(); - } - if (success) { gDrawColorProgram.mName = "Draw Color Shader"; diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h index 903bdb3d35..75bedaf985 100644 --- a/indra/newview/llviewershadermgr.h +++ b/indra/newview/llviewershadermgr.h @@ -176,7 +176,6 @@ extern LLGLSLShader gClipProgram; extern LLGLSLShader gBenchmarkProgram; extern LLGLSLShader gReflectionProbeDisplayProgram; extern LLGLSLShader gCopyProgram; -extern LLGLSLShader gCopyDepthProgram; extern LLGLSLShader gPBRTerrainBakeProgram; extern LLGLSLShader gDrawColorProgram; diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp index 41c7f9cd01..f7baafbdd1 100644 --- a/indra/newview/llviewertexture.cpp +++ b/indra/newview/llviewertexture.cpp @@ -3210,8 +3210,6 @@ void LLViewerLODTexture::processTextureStats() } } -extern LLGLSLShader gCopyProgram; - bool LLViewerLODTexture::scaleDown() { if (mGLTexturep.isNull() || !mGLTexturep->getHasGLTexture()) diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 8a38615614..a1580dc711 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -6248,6 +6248,16 @@ struct CompareBatchBreaker const LLTextureEntry* lte = lhs->getTextureEntry(); const LLTextureEntry* rte = rhs->getTextureEntry(); + // Group faces sharing a GLTF material so the PBR push loop can skip + // redundant LLFetchedGLTFMaterial::bind calls (see LLRenderPass::pushGLTFBatch). + // Non-PBR faces have a null render material, so this is a no-op for them. + const LLGLTFMaterial* lgltf = lte->getGLTFRenderMaterial(); + const LLGLTFMaterial* rgltf = rte->getGLTFRenderMaterial(); + if (lgltf != rgltf) + { + return lgltf < rgltf; + } + if (lte->getBumpmap() != rte->getBumpmap()) { return lte->getBumpmap() < rte->getBumpmap(); diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 48af355dc1..ac0aba85e0 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -2953,26 +2953,6 @@ void LLPipeline::doOcclusion(LLCamera& camera) gGL.setColorMask(true, true); } - if (sReflectionProbesEnabled && sUseOcclusion > 1 && !LLPipeline::sShadowRender && !gCubeSnapshot) - { - gGL.setColorMask(false, false); - LLGLDepthTest depth(GL_TRUE, GL_FALSE); - LLGLDisable cull(GL_CULL_FACE); - - gOcclusionCubeProgram.bind(); - - if (mCubeVB.isNull()) - { //cube VB will be used for issuing occlusion queries - mCubeVB = ll_create_cube_vb(LLVertexBuffer::MAP_VERTEX); - } - mCubeVB->setBuffer(); - - mHeroProbeManager.doOcclusion(); - gOcclusionCubeProgram.unbind(); - - gGL.setColorMask(true, true); - } - if (LLPipeline::sUseOcclusion > 1 && (sCull->hasOcclusionGroups() || LLVOCachePartition::sNeedsOcclusionCheck)) { @@ -7091,6 +7071,10 @@ void LLPipeline::renderAlphaObjects(bool rigged) const LLVOAvatar* lastAvatarGLTF = nullptr; U64 lastMeshIdGLTF = 0; bool skipLastSkinGLTF; + // GLTF material bind cache; invalidated in the non-GLTF branches below since + // mSimplePool->pushBatch rebinds texture units and would clobber the material + LLFetchedGLTFMaterial* lastMatGLTF = nullptr; + LLViewerTexture* lastTexGLTF = nullptr; auto* begin = gPipeline.beginRenderMap(type); auto* end = gPipeline.endRenderMap(type); @@ -7114,7 +7098,7 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); - LLRenderPass::pushRiggedGLTFBatch(*pparams, lastAvatarGLTF, lastMeshIdGLTF, skipLastSkinGLTF); + LLRenderPass::pushRiggedGLTFBatch(*pparams, lastAvatarGLTF, lastMeshIdGLTF, skipLastSkinGLTF, lastMatGLTF, lastTexGLTF); } else { @@ -7122,6 +7106,8 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); + lastMatGLTF = nullptr; // pushBatch clobbers texture units + lastTexGLTF = nullptr; if (mSimplePool->uploadMatrixPalette(pparams->mAvatar, pparams->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin)) { mSimplePool->pushBatch(*pparams, true, true); @@ -7136,7 +7122,7 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); - LLRenderPass::pushGLTFBatch(*pparams); + LLRenderPass::pushGLTFBatch(*pparams, lastMatGLTF, lastTexGLTF); } else { @@ -7144,6 +7130,8 @@ void LLPipeline::renderAlphaObjects(bool rigged) LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up); LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width); LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF); + lastMatGLTF = nullptr; // pushBatch clobbers texture units + lastTexGLTF = nullptr; mSimplePool->pushBatch(*pparams, true, true); } } @@ -7324,14 +7312,8 @@ void LLPipeline::generateExposure(LLRenderTarget* src, LLRenderTarget* dst, bool if (use_history) { // copy last frame's exposure into mLastExposure - mLastExposure.bindTarget(); - gCopyProgram.bind(); - gGL.getTexUnit(0)->bind(dst); - - mScreenTriangleVB->setBuffer(); - mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - mLastExposure.flush(); + mLastExposure.copyContents(*dst, 0, 0, dst->getWidth(), dst->getHeight(), 0, 0, mLastExposure.getWidth(), mLastExposure.getHeight(), + GL_COLOR_BUFFER_BIT, GL_NEAREST); } dst->bindTarget(); @@ -7989,23 +7971,8 @@ void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* { LL_PROFILE_GPU_ZONE("ssr copy"); LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); - - LLRenderTarget& depth_src = mRT->deferredScreen; - - dst->bindTarget(); - dst->clear(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - mScreenTriangleVB->setBuffer(); - mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst->flush(); + dst->copyContents(*src, 0, 0, src->getWidth(), src->getHeight(), 0, 0, dst->getWidth(), dst->getHeight(), + GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); } } @@ -9906,24 +9873,11 @@ void LLPipeline::doAtmospherics() LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); LLRenderTarget& src = gPipeline.mRT->screen; - LLRenderTarget& depth_src = gPipeline.mRT->deferredScreen; LLRenderTarget& dst = gPipeline.mWaterDis; - mRT->screen.flush(); - dst.bindTarget(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(&src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - gGL.setColorMask(false, false); - gPipeline.mScreenTriangleVB->setBuffer(); - gPipeline.mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst.flush(); + src.flush(); + dst.copyContents(src, 0, 0, src.getWidth(), src.getHeight(), 0, 0, dst.getWidth(), dst.getHeight(), + GL_DEPTH_BUFFER_BIT, GL_NEAREST); mRT->screen.bindTarget(); } @@ -9970,24 +9924,11 @@ void LLPipeline::doWaterHaze() LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); LLRenderTarget& src = gPipeline.mRT->screen; - LLRenderTarget& depth_src = gPipeline.mRT->deferredScreen; LLRenderTarget& dst = gPipeline.mWaterDis; - mRT->screen.flush(); - dst.bindTarget(); - gCopyDepthProgram.bind(); - - S32 diff_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DIFFUSE_MAP); - S32 depth_map = gCopyDepthProgram.getTextureChannel(LLShaderMgr::DEFERRED_DEPTH); - - gGL.getTexUnit(diff_map)->bind(&src); - gGL.getTexUnit(depth_map)->bind(&depth_src, true); - - gGL.setColorMask(false, false); - gPipeline.mScreenTriangleVB->setBuffer(); - gPipeline.mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); - - dst.flush(); + src.flush(); + dst.copyContents(src, 0, 0, src.getWidth(), src.getHeight(), 0, 0, dst.getWidth(), dst.getHeight(), + GL_DEPTH_BUFFER_BIT, GL_NEAREST); mRT->screen.bindTarget(); } @@ -10403,7 +10344,7 @@ static LLTrace::BlockTimerStatHandle FTM_SHADOW_ALPHA_TREE("Alpha Tree"); static LLTrace::BlockTimerStatHandle FTM_SHADOW_ALPHA_GRASS("Alpha Grass"); static LLTrace::BlockTimerStatHandle FTM_SHADOW_FULLBRIGHT_ALPHA_MASKED("Fullbright Alpha Masked"); -void LLPipeline::renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& shadow_cam, LLCullResult& result, bool depth_clamp) +void LLPipeline::renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& shadow_cam, LLCullResult& result, bool depth_clamp, bool do_cull) { LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE; //LL_RECORD_BLOCK_TIME(FTM_SHADOW_RENDER); LL_PROFILE_GPU_ZONE("renderShadow"); @@ -10439,7 +10380,13 @@ void LLPipeline::renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCa LLGLDepthTest depth_test(GL_TRUE, GL_TRUE, GL_LESS); - updateCull(shadow_cam, result); + // In RenderShadowCullMode 1, do_cull is false: generateSunShadow did the single union + // octree cull and pre-filtered `result` to this cascade's frustum (bucketShadowCull), + // so skip the per-cascade octree walk and only sort/build this cascade's render map. + if (do_cull) + { + updateCull(shadow_cam, result); + } stateSort(shadow_cam, result); @@ -10837,6 +10784,49 @@ class LLDisableOcclusionCulling } }; +// Re-bucket a shared sun-shadow cull (produced by a single union octree walk) down to one +// cascade: copy the union's visible/drawable groups whose object bounds intersect this +// cascade's frustum into `dst` (the same AABBInFrustumObjectBounds test the per-cascade +// cull uses, so the geometry matches mode 0 exactly), then pass the small individual- +// drawable and bridge lists through unfiltered. stateSort then builds the cascade's render +// map from `dst`. Lets RenderShadowCullMode 1 share one octree walk across all cascades. +static void bucketShadowCull(LLCullResult& src, LLCamera& cam, LLCullResult& dst) +{ + dst.clear(); + + for (LLCullResult::sg_iterator i = src.beginVisibleGroups(), end = src.endVisibleGroups(); i != end; ++i) + { + LLSpatialGroup* group = *i; + if (!group->isDead() && + cam.AABBInFrustum(group->getObjectBounds()[0], group->getObjectBounds()[1]) > 0) + { + dst.pushVisibleGroup(group); + } + } + + for (LLCullResult::sg_iterator i = src.beginDrawableGroups(), end = src.endDrawableGroups(); i != end; ++i) + { + LLSpatialGroup* group = *i; + if (!group->isDead() && + cam.AABBInFrustum(group->getObjectBounds()[0], group->getObjectBounds()[1]) > 0) + { + dst.pushDrawableGroup(group); + } + } + + // Individual drawables and spatial bridges (attachments/animesh) are few; pass them + // through unfiltered -- conservative (they render into every cascade) but correct. + for (LLCullResult::drawable_iterator i = src.beginVisibleList(), end = src.endVisibleList(); i != end; ++i) + { + dst.pushDrawable(*i); + } + + for (LLCullResult::bridge_iterator i = src.beginVisibleBridge(), end = src.endVisibleBridge(); i != end; ++i) + { + dst.pushBridge(*i); + } +} + void LLPipeline::generateSunShadow(LLCamera& camera) { if (!sRenderDeferred || RenderShadowDetail <= 0) @@ -11070,6 +11060,105 @@ void LLPipeline::generateSunShadow(LLCamera& camera) } else { + // RenderShadowCullMode 1: do the expensive octree cull ONCE against a frustum + // spanning every sun cascade, then have each cascade cheaply re-bucket the union's + // visible groups by its own frustum (bucketShadowCull) and build its own render + // map. Saves 3 of 4 octree walks per frame while each cascade still renders only + // its own slice -- GPU-neutral vs. per-cascade culling, so it helps CPU-bound + // targets without regressing GPU-bound ones. Disabled in cube snapshots. Default 0. + static LLCachedControl sShadowCullMode(gSavedSettings, "RenderShadowCullMode", 0); + bool have_union_cull = false; + static LLCullResult sUnionShadowResult; + if (sShadowCullMode() == 1 && !gCubeSnapshot) + { + // updateFrustumPlanes below seeds the frustum corners from the *current* GL + // matrices, and earlier setup in this function leaves them in a non-main-view + // state. Restore the saved (main-view) matrices first, as the cascade loop + // does each iteration, so the corner directions used below are correct. + set_current_modelview(saved_view); + set_current_projection(saved_proj); + + LLCamera ucam = camera; + ucam.setFar(16.f); + LLViewerCamera::updateFrustumPlanes(ucam, false, false, true); + + LLVector3 ueye = camera.getOrigin(); + LLVector3* ufrust = ucam.mAgentFrustum; + LLVector3 upn = ucam.getAtAxis(); + for (U32 i = 0; i < 4; i++) + { + LLVector3 delta = ufrust[i+4]-ueye; + delta += (ufrust[i+4]-ufrust[(i+2)%4+4])*0.05f; + delta.normVec(); + F32 dp = delta*upn; + ufrust[i] = ueye + (delta*dist[0]*0.75f)/dp; + ufrust[i+4] = ueye + (delta*dist[4]*1.25f)/dp; + } + + { + glm::mat4 uview = look(camera.getOrigin(), lightDir, -up); + + // AABB the 8 full-range frustum corners directly in light space. ufrust + // spans [dist[0], dist[4]] (built above), so this box is a guaranteed + // superset of every cascade. getVisiblePointCloud is NOT usable here: the + // far corners sit past the view far plane, so it clips the cloud down to + // the 4 near corners and the union collapses to a dot at the camera. + LLVector3 mn(mul_mat4_vec3(uview, glm::vec3(ufrust[0]))); + LLVector3 mx = mn; + for (U32 i = 1; i < 8; i++) + { + LLVector3 p(mul_mat4_vec3(uview, glm::vec3(ufrust[i]))); + update_min_max(mn, mx, p); + } + + LLVector3 ucenter = (mn+mx)*0.5f; + + // Conservative ortho light-space projection bounding the whole point + // cloud. updateFrustumPlanes derives the cull frustum from the *current* + // GL modelview/projection, so set them here. Ortho is looser than the + // per-cascade perspective fit, so the result is a superset of every + // cascade frustum -- no dropped casters. + // + // Pad the depth range: with the sun near-overhead the light-space + // footprint is nearly planar (znear ~= zfar), which makes glm::ortho + // singular and updateFrustumPlanes unproject to NaN frustum corners -- + // shadows then drop and flip with camera angle. The near plane is + // replaced by shadow_near_clip below and the far only needs to clear the + // receivers, so widening the depth range is always safe. + F32 zpad = llmax(mx.mV[0] - mn.mV[0], mx.mV[1] - mn.mV[1]) * 0.5f + 1.f; + glm::mat4 uproj = glm::ortho(mn.mV[0], mx.mV[0], mn.mV[1], mx.mV[1], -mx.mV[2] - zpad, -mn.mV[2] + zpad); + + ucam.setOriginAndLookAt(ueye, up, ucenter); + ucam.setOrigin(0, 0, 0); + + LLViewerCamera::sCurCameraID = LLViewerCamera::CAMERA_SUN_SHADOW0; + set_current_modelview(uview); + set_current_projection(uproj); + LLViewerCamera::updateFrustumPlanes(ucam, false, false, true); + ucam.getAgentPlane(LLCamera::AGENT_PLANE_NEAR).set(shadow_near_clip); + + bool saved_shadow_render = LLPipeline::sShadowRender; + U32 saved_occlusion = sUseOcclusion; + LLPipeline::sShadowRender = true; + // Disable occlusion culling for the shadow cull exactly as renderShadow + // does: occlusion queries are main-camera and previous-frame based, so + // leaving them on wrongly culls casters hidden from the main view (their + // shadows still show) and flickers as the queries resolve frame to frame. + sUseOcclusion = 0; + // One octree walk for the whole sun shadow. No stateSort here -- each + // cascade re-buckets these visible groups and sorts its own render map. + updateCull(ucam, sUnionShadowResult); + sUseOcclusion = saved_occlusion; + LLPipeline::sShadowRender = saved_shadow_render; + + // restore main matrices (the cascade loop sets its own each iteration) + set_current_modelview(saved_view); + set_current_projection(saved_proj); + + have_union_cull = true; + } + } + for (S32 j = 0; j < (gCubeSnapshot ? 2 : 4); j++) { if (!hasRenderDebugMask(RENDER_DEBUG_SHADOW_FRUSTA) && !gCubeSnapshot) @@ -11430,7 +11519,11 @@ void LLPipeline::generateSunShadow(LLCamera& camera) { static LLCullResult result[4]; - renderShadow(view[j], proj[j], shadow_cam, result[j], true); + if (have_union_cull) + { // re-bucket the shared union cull down to this cascade's frustum + bucketShadowCull(sUnionShadowResult, shadow_cam, result[j]); + } + renderShadow(view[j], proj[j], shadow_cam, result[j], true, !have_union_cull); } mRT->shadow[j].flush(); diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index 665b016ef0..f7cc8dd10c 100644 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -353,7 +353,7 @@ class LLPipeline void renderHighlight(const LLViewerObject* obj, F32 fade); - void renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& camera, LLCullResult& result, bool depth_clamp); + void renderShadow(const glm::mat4& view, const glm::mat4& proj, LLCamera& camera, LLCullResult& result, bool depth_clamp, bool do_cull = true); void renderSelectedFaces(const LLColor4& color); void renderHighlights(); void renderDebug();