Skip to content
Merged
60 changes: 6 additions & 54 deletions indra/llmath/llmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,14 @@ constexpr F64 llabs(const F64 a) noexcept
return std::bit_cast<F64>(std::bit_cast<U64>(a) & 0x7fffffffffffffffull);
}

constexpr S32 lltrunc(F32 f)
inline S32 lltrunc(F32 f)
{
return narrow(f);
return (S32)std::trunc(f);
}

constexpr S32 lltrunc(F64 f)
inline S32 lltrunc(F64 f)
{
return narrow(f);
return (S32)std::trunc(f);
}

inline S32 llfloor(F32 f)
Expand All @@ -184,67 +184,19 @@ inline S32 llfloor(F32 f)
#endif
}


inline S32 llceil( F32 f )
{
// This could probably be optimized, but this works.
return (S32)ceil(f);
}


#ifndef BOGUS_ROUND
// Use this round. Does an arithmetic round (0.5 always rounds up)
inline S32 ll_round(const F32 val)
{
return llfloor(val + 0.5f);
return (S32)lround(val);
}

#else // BOGUS_ROUND
// Old ll_round implementation - does banker's round (toward nearest even in the case of a 0.5.
// Not using this because we don't have a consistent implementation on both platforms, use
// llfloor(val + 0.5f), which is consistent on all platforms.
inline S32 ll_round(const F32 val)
{
#if LL_WINDOWS
// Note: assumes that the floating point control word is set to rounding mode (the default)
S32 ret_val;
_asm fld val
_asm fistp ret_val;
return ret_val;
#elif LL_LINUX
// Note: assumes that the floating point control word is set
// to rounding mode (the default)
S32 ret_val;
__asm__ __volatile__( "flds %1 \n\t"
"fistpl %0 \n\t"
: "=m" (ret_val)
: "m" (val) );
return ret_val;
#else
return llfloor(val + 0.5f);
#endif
}

// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
inline int round_int(double x)
{
const float round_to_nearest = 0.5f;
int i;
__asm
{
fld x
fadd st, st (0)
fadd round_to_nearest
fistp i
sar i, 1
}
return (i);
}
#endif // BOGUS_ROUND

inline F64 ll_round(const F64 val)
{
return F64(floor(val + 0.5f));
return round(val);
}

inline F32 ll_round( F32 val, F32 nearest )
Expand Down
80 changes: 80 additions & 0 deletions indra/llrender/llrendertarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,86 @@ void LLRenderTarget::flush()
}
}

void LLRenderTarget::copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1,
S32 dstY1, U32 mask, U32 filter)
{
LL_PROFILE_GPU_ZONE("LLRenderTarget::copyContents");

GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE;

LLGLDepthTest depth(write_depth, write_depth);

gGL.flush();
if (!source.mFBO || !mFBO)
{
LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL;
return;
}

if (mask == GL_DEPTH_BUFFER_BIT && source.mStencil != mStencil)
{
stop_glerror();

glBindFramebuffer(GL_FRAMEBUFFER, source.mFBO);
check_framebuffer_status();
gGL.getTexUnit(0)->bind(this, true);
stop_glerror();
// glCopyTexSubImage2D(target, level, xoffset, yoffset, x, y, width, height):
// xoffset/yoffset are the destination texel offset, x/y the source framebuffer
// origin, and the last two are dimensions (not endpoints).
glCopyTexSubImage2D(LLTexUnit::getInternalType(mUsage), 0, dstX0, dstY0, srcX0, srcY0, srcX1 - srcX0, srcY1 - srcY0);
stop_glerror();
glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO);
stop_glerror();
}
else
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO);
stop_glerror();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mFBO);
stop_glerror();
check_framebuffer_status();
stop_glerror();
glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter);
stop_glerror();
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
stop_glerror();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
stop_glerror();
glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO);
stop_glerror();
}
}

// static
void LLRenderTarget::copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0,
S32 dstX1, S32 dstY1, U32 mask, U32 filter)
{
if (!source.mFBO)
{
LL_WARNS() << "Cannot copy framebuffer contents for non FBO render targets." << LL_ENDL;
return;
}

{
LL_PROFILE_GPU_ZONE("copyContentsToFramebuffer");
GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? GL_TRUE : GL_FALSE;

LLGLDepthTest depth(write_depth, write_depth);

glBindFramebuffer(GL_READ_FRAMEBUFFER, source.mFBO);
stop_glerror();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
stop_glerror();
check_framebuffer_status();
stop_glerror();
glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter);
stop_glerror();
glBindFramebuffer(GL_FRAMEBUFFER, sCurFBO);
stop_glerror();
}
}

bool LLRenderTarget::isComplete() const
{
return !mTex.empty() || mDepth;
Expand Down
6 changes: 6 additions & 0 deletions indra/llrender/llrendertarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ class LLRenderTarget
// asserts that this target is currently bound
void flush();

void copyContents(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0, S32 dstX1, S32 dstY1,
U32 mask, U32 filter);

static void copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0, S32 srcY0, S32 srcX1, S32 srcY1, S32 dstX0, S32 dstY0,
S32 dstX1, S32 dstY1, U32 mask, U32 filter);

//Returns TRUE if target is ready to be rendered into.
//That is, if the target has been allocated with at least
//one renderable attachment (i.e. color buffer, depth buffer).
Expand Down
22 changes: 22 additions & 0 deletions indra/newview/app_settings/settings_alchemy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,28 @@
<key>Value</key>
<integer>0</integer>
</map>
<key>RenderAvatarShadowDetail</key>
<map>
<key>Comment</key>
<string>Which avatar passes cast shadows. 0 = opaque only, 1 = opaque + alpha mask, 2 = full (also alpha blend). Lower values speed up crowd scenes by skipping the expensive alpha-blend avatar shadow pass across all cascades.</string>
<key>Persist</key>
<integer>1</integer>
<key>Type</key>
<string>S32</string>
<key>Value</key>
<integer>2</integer>
</map>
<key>RenderShadowCullMode</key>
<map>
<key>Comment</key>
<string>How sun shadow cascades are culled. 0 = cull and sort each cascade separately (default). 1 = cull and sort once against a frustum spanning all cascades, sharing the result (less CPU per frame, more GPU vertex work per cascade). Experimental.</string>
<key>Persist</key>
<integer>1</integer>
<key>Type</key>
<string>S32</string>
<key>Value</key>
<integer>0</integer>
</map>
<key>RenderBloomHDR</key>
<map>
<key>Comment</key>
Expand Down
31 changes: 22 additions & 9 deletions indra/newview/lldrawpool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,8 @@ void LLRenderPass::pushGLTFBatches(U32 type, bool textured)
void LLRenderPass::pushGLTFBatches(U32 type)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
LLFetchedGLTFMaterial* lastMat = nullptr;
LLViewerTexture* lastTex = nullptr;
auto* begin = gPipeline.beginRenderMap(type);
auto* end = gPipeline.endRenderMap(type);
for (LLCullResult::drawinfo_iterator i = begin; i != end; )
Expand All @@ -794,7 +796,7 @@ void LLRenderPass::pushGLTFBatches(U32 type)
LLDrawInfo& params = **i;
LLCullResult::increment_iterator(i, end);

pushGLTFBatch(params);
pushGLTFBatch(params, lastMat, lastTex);
}
}

Expand All @@ -814,16 +816,25 @@ void LLRenderPass::pushUntexturedGLTFBatches(U32 type)
}

// static
void LLRenderPass::pushGLTFBatch(LLDrawInfo& params)
void LLRenderPass::pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex)
{
auto& mat = params.mGLTFMaterial;
LLFetchedGLTFMaterial* mat = params.mGLTFMaterial.get();

if (mat.notNull())
if (mat)
{
mat->bind(params.mTexture);
// params.mTexture is the media override (bind() applies it to base color
// and emissive), so it is part of the cache key -- otherwise media faces
// sharing a material would render with a stale base texture.
LLViewerTexture* tex = params.mTexture.get();
if (mat != lastMat || tex != lastTex)
{
mat->bind(params.mTexture);
lastMat = mat;
lastTex = tex;
}
}

LLGLDisable cull_face(mat.notNull() && mat->mDoubleSided ? GL_CULL_FACE : 0);
LLGLDisable cull_face(mat && mat->mDoubleSided ? GL_CULL_FACE : 0);

setup_texture_matrix(params);

Expand Down Expand Up @@ -866,6 +877,8 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type)
const LLVOAvatar* lastAvatar = nullptr;
U64 lastMeshId = 0;
bool skipLastSkin = false;
LLFetchedGLTFMaterial* lastMat = nullptr;
LLViewerTexture* lastTex = nullptr;

auto* begin = gPipeline.beginRenderMap(type);
auto* end = gPipeline.endRenderMap(type);
Expand All @@ -875,7 +888,7 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type)
LLDrawInfo& params = **i;
LLCullResult::increment_iterator(i, end);

pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin);
pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin, lastMat, lastTex);
}
}

Expand All @@ -900,11 +913,11 @@ void LLRenderPass::pushUntexturedRiggedGLTFBatches(U32 type)


// static
void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin)
void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex)
{
if (uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
{
pushGLTFBatch(params);
pushGLTFBatch(params, lastMat, lastTex);
}
}

Expand Down
7 changes: 5 additions & 2 deletions indra/newview/lldrawpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class LLDrawInfo;
class LLVOAvatar;
class LLGLSLShader;
class LLMeshSkinInfo;
class LLFetchedGLTFMaterial;

class LLDrawPool
{
Expand Down Expand Up @@ -376,8 +377,10 @@ class LLRenderPass : public LLDrawPool
void pushUntexturedRiggedGLTFBatches(U32 type);

// push a single GLTF draw call
static void pushGLTFBatch(LLDrawInfo& params);
static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin);
// lastMat/lastTex track the most recently bound material+media texture so
// consecutive draws sharing a material skip the redundant LLFetchedGLTFMaterial::bind
static void pushGLTFBatch(LLDrawInfo& params, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex);
static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin, LLFetchedGLTFMaterial*& lastMat, LLViewerTexture*& lastTex);
static void pushUntexturedGLTFBatch(LLDrawInfo& params);
static void pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin);

Expand Down
12 changes: 12 additions & 0 deletions indra/newview/lldrawpoolavatar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,18 @@ void LLDrawPoolAvatar::renderShadow(S32 pass)
return;
}

// Optionally skip the costlier avatar shadow passes (alpha blend is the most
// expensive and least visually important; alpha mask next). Default 2 = full.
static LLCachedControl<S32> avatar_shadow_detail(gSavedSettings, "RenderAvatarShadowDetail", 2);
if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND && avatar_shadow_detail() < 2)
{
return;
}
if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK && avatar_shadow_detail() < 1)
{
return;
}

LLDrawPoolAvatar::sShadowPass = pass;

if (pass == SHADOW_PASS_AVATAR_OPAQUE)
Expand Down
35 changes: 29 additions & 6 deletions indra/newview/lldrawpoolbump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,28 +546,51 @@ void LLDrawPoolBump::renderDeferred(S32 pass)
for (int i = 0; i < 2; ++i)
{
bool rigged = i == 1;

U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP;
LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type);
LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type);
if (begin == end)
{ // no bump geometry in this pass -- skip the shader bind and texture setup
continue;
}

gDeferredBumpProgram.bind(rigged);
diffuse_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
bump_channel = LLGLSLShader::sCurBoundShaderPtr->enableTexture(LLViewerShaderMgr::BUMP_MAP);
gGL.getTexUnit(diffuse_channel)->unbind(LLTexUnit::TT_TEXTURE);
gGL.getTexUnit(bump_channel)->unbind(LLTexUnit::TT_TEXTURE);

U32 type = rigged ? LLRenderPass::PASS_BUMP_RIGGED : LLRenderPass::PASS_BUMP;
LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type);
LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type);

const LLVOAvatar* lastAvatar = nullptr;
U64 lastMeshId = 0;
bool skipLastSkin = false;

// Faces are sorted by bumpmap then texture, so the alpha-mask cutoff and the
// bump-image bind (an image lookup + texture bind) repeat across runs of faces.
// Skip them when unchanged. (bindBumpMap's only side effect, addTextureStats, is
// max-based on the source texture, so skipping a repeat is a no-op there too.)
U8 lastBump = 255;
LLViewerTexture* lastBumpTex = nullptr;
F32 lastAlpha = -1.f;

for (LLCullResult::drawinfo_iterator i = begin; i != end; )
{
LLDrawInfo& params = **i;

LLCullResult::increment_iterator(i, end);

LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(params.mAlphaMaskCutoff);
LLDrawPoolBump::bindBumpMap(params, bump_channel);
if (params.mAlphaMaskCutoff != lastAlpha)
{
lastAlpha = params.mAlphaMaskCutoff;
LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(lastAlpha);
}

if (params.mBump != lastBump || params.mTexture.get() != lastBumpTex)
{
lastBump = params.mBump;
lastBumpTex = params.mTexture.get();
LLDrawPoolBump::bindBumpMap(params, bump_channel);
}

if (rigged)
{
Expand Down
Loading
Loading