Skip to content

Commit a65c2cf

Browse files
authored
Revert "Make emitted egal code more loopy (JuliaLang#54121)" (JuliaLang#57453)
This reverts a portion of commit 50833c8. This algorithm is not able to handle simple cases where there is any internal padding, such as the example of: ``` struct LotsBytes a::Int8 b::NTuple{256,Int} c::Int end ``` Unfortunately fixing it is a bit of a large project right now, so reverting now to fix correctness while working on that. Fixes JuliaLang#55513 (indirectly, by removing broken code) Maybe reopens JuliaLang#54109, although the latency issue it proposes to fix doesn't occur on master even with this revert (just the mediocre looking IR result output returns)
1 parent 992c520 commit a65c2cf

File tree

2 files changed

+0
-192
lines changed

2 files changed

+0
-192
lines changed

Compiler/test/codegen.jl

-51
Original file line numberDiff line numberDiff line change
@@ -889,57 +889,6 @@ ex54166 = Union{Missing, Int64}[missing -2; missing -2];
889889
dims54166 = (1,2)
890890
@test (minimum(ex54166; dims=dims54166)[1] === missing)
891891

892-
# #54109 - Excessive LLVM time for egal
893-
struct DefaultOr54109{T}
894-
x::T
895-
default::Bool
896-
end
897-
898-
@eval struct Torture1_54109
899-
$((Expr(:(::), Symbol("x$i"), DefaultOr54109{Float64}) for i = 1:897)...)
900-
end
901-
Torture1_54109() = Torture1_54109((DefaultOr54109(1.0, false) for i = 1:897)...)
902-
903-
@eval struct Torture2_54109
904-
$((Expr(:(::), Symbol("x$i"), DefaultOr54109{Float64}) for i = 1:400)...)
905-
$((Expr(:(::), Symbol("x$(i+400)"), DefaultOr54109{Int16}) for i = 1:400)...)
906-
end
907-
Torture2_54109() = Torture2_54109((DefaultOr54109(1.0, false) for i = 1:400)..., (DefaultOr54109(Int16(1), false) for i = 1:400)...)
908-
909-
@noinline egal_any54109(x, @nospecialize(y::Any)) = x === Base.compilerbarrier(:type, y)
910-
911-
let ir1 = get_llvm(egal_any54109, Tuple{Torture1_54109, Any}),
912-
ir2 = get_llvm(egal_any54109, Tuple{Torture2_54109, Any})
913-
914-
# We can't really do timing on CI, so instead, let's look at the length of
915-
# the optimized IR. The original version had tens of thousands of lines and
916-
# was slower, so just check here that we only have < 500 lines. If somebody,
917-
# implements a better comparison that's larger than that, just re-benchmark
918-
# this and adjust the threshold.
919-
920-
@test count(==('\n'), ir1) < 500
921-
@test count(==('\n'), ir2) < 500
922-
end
923-
924-
## Regression test for egal of a struct of this size without padding, but with
925-
## non-bitsegal, to make sure that it doesn't accidentally go down the accelerated
926-
## path.
927-
@eval struct BigStructAnyInt
928-
$((Expr(:(::), Symbol("x$i"), Pair{Any, Int}) for i = 1:33)...)
929-
end
930-
BigStructAnyInt() = BigStructAnyInt((Union{Base.inferencebarrier(Float64), Int}=>i for i = 1:33)...)
931-
@test egal_any54109(BigStructAnyInt(), BigStructAnyInt())
932-
933-
## For completeness, also test correctness, since we don't have a lot of
934-
## large-struct tests.
935-
936-
# The two allocations of the same struct will likely have different padding,
937-
# we want to make sure we find them egal anyway - a naive memcmp would
938-
# accidentally look at it.
939-
@test egal_any54109(Torture1_54109(), Torture1_54109())
940-
@test egal_any54109(Torture2_54109(), Torture2_54109())
941-
@test !egal_any54109(Torture1_54109(), Torture1_54109((DefaultOr54109(2.0, false) for i = 1:897)...))
942-
943892
bar54599() = Base.inferencebarrier(true) ? (Base.PkgId(Main),1) : nothing
944893

945894
function foo54599()

src/codegen.cpp

-141
Original file line numberDiff line numberDiff line change
@@ -3616,61 +3616,6 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
36163616
return phi;
36173617
}
36183618

3619-
struct egal_desc {
3620-
size_t offset;
3621-
size_t nrepeats;
3622-
size_t data_bytes;
3623-
size_t padding_bytes;
3624-
};
3625-
3626-
template <typename callback>
3627-
static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty, egal_desc &current_desc)
3628-
{
3629-
// Memcmp, but with masked padding
3630-
size_t data_bytes = 0;
3631-
size_t padding_bytes = 0;
3632-
size_t nfields = jl_datatype_nfields(aty);
3633-
size_t total_size = jl_datatype_size(aty);
3634-
assert(aty->layout->flags.isbitsegal);
3635-
for (size_t i = 0; i < nfields; ++i) {
3636-
size_t offset = jl_field_offset(aty, i);
3637-
size_t fend = i == nfields - 1 ? total_size : jl_field_offset(aty, i + 1);
3638-
size_t fsz = jl_field_size(aty, i);
3639-
jl_datatype_t *fty = (jl_datatype_t*)jl_field_type(aty, i);
3640-
assert(jl_is_datatype(fty)); // union fields should never reach here
3641-
assert(fty->layout->flags.isbitsegal);
3642-
if (jl_field_isptr(aty, i) || !fty->layout->flags.haspadding) {
3643-
// The field has no internal padding
3644-
data_bytes += fsz;
3645-
if (offset + fsz == fend) {
3646-
// The field has no padding after. Merge this into the current
3647-
// comparison range and go to next field.
3648-
} else {
3649-
padding_bytes = fend - offset - fsz;
3650-
// Found padding. Either merge this into the current comparison
3651-
// range, or emit the old one and start a new one.
3652-
if (current_desc.data_bytes == data_bytes &&
3653-
current_desc.padding_bytes == padding_bytes) {
3654-
// Same as the previous range, just note that down, so we
3655-
// emit this as a loop.
3656-
current_desc.nrepeats += 1;
3657-
} else {
3658-
if (current_desc.nrepeats != 0)
3659-
emit_desc(current_desc);
3660-
current_desc.nrepeats = 1;
3661-
current_desc.data_bytes = data_bytes;
3662-
current_desc.padding_bytes = padding_bytes;
3663-
}
3664-
data_bytes = 0;
3665-
}
3666-
} else {
3667-
// The field may have internal padding. Recurse this.
3668-
data_bytes += emit_masked_bits_compare(emit_desc, fty, current_desc);
3669-
}
3670-
}
3671-
return data_bytes;
3672-
}
3673-
36743619
static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
36753620
{
36763621
++EmittedBitsCompares;
@@ -3747,92 +3692,6 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
37473692
}
37483693
return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
37493694
}
3750-
else if (sz > 512 && jl_struct_try_layout(sty) && sty->layout->flags.isbitsegal) {
3751-
Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) :
3752-
value_to_pointer(ctx, arg1).V;
3753-
Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) :
3754-
value_to_pointer(ctx, arg2).V;
3755-
varg1 = emit_pointer_from_objref(ctx, varg1);
3756-
varg2 = emit_pointer_from_objref(ctx, varg2);
3757-
3758-
// See above for why we want to do this
3759-
SmallVector<Value*, 0> gc_uses;
3760-
gc_uses.append(get_gc_roots_for(ctx, arg1));
3761-
gc_uses.append(get_gc_roots_for(ctx, arg2));
3762-
OperandBundleDef OpBundle("jl_roots", gc_uses);
3763-
3764-
Value *answer = nullptr;
3765-
auto emit_desc = [&](egal_desc desc) {
3766-
Value *ptr1 = varg1;
3767-
Value *ptr2 = varg2;
3768-
if (desc.offset != 0) {
3769-
ptr1 = emit_ptrgep(ctx, ptr1, desc.offset);
3770-
ptr2 = emit_ptrgep(ctx, ptr2, desc.offset);
3771-
}
3772-
3773-
Value *new_ptr1 = ptr1;
3774-
Value *endptr1 = nullptr;
3775-
BasicBlock *postBB = nullptr;
3776-
BasicBlock *loopBB = nullptr;
3777-
PHINode *answerphi = nullptr;
3778-
if (desc.nrepeats != 1) {
3779-
// Set up loop
3780-
endptr1 = emit_ptrgep(ctx, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes));;
3781-
3782-
BasicBlock *currBB = ctx.builder.GetInsertBlock();
3783-
loopBB = BasicBlock::Create(ctx.builder.getContext(), "egal_loop", ctx.f);
3784-
postBB = BasicBlock::Create(ctx.builder.getContext(), "post", ctx.f);
3785-
ctx.builder.CreateBr(loopBB);
3786-
3787-
ctx.builder.SetInsertPoint(loopBB);
3788-
Type *TInt1 = getInt1Ty(ctx.builder.getContext());
3789-
answerphi = ctx.builder.CreatePHI(TInt1, 2);
3790-
answerphi->addIncoming(answer ? answer : ConstantInt::get(TInt1, 1), currBB);
3791-
answer = answerphi;
3792-
3793-
PHINode *itr1 = ctx.builder.CreatePHI(ptr1->getType(), 2);
3794-
PHINode *itr2 = ctx.builder.CreatePHI(ptr2->getType(), 2);
3795-
3796-
new_ptr1 = emit_ptrgep(ctx, itr1, desc.data_bytes + desc.padding_bytes);
3797-
itr1->addIncoming(ptr1, currBB);
3798-
itr1->addIncoming(new_ptr1, loopBB);
3799-
3800-
Value *new_ptr2 = emit_ptrgep(ctx, itr2, desc.data_bytes + desc.padding_bytes);
3801-
itr2->addIncoming(ptr2, currBB);
3802-
itr2->addIncoming(new_ptr2, loopBB);
3803-
3804-
ptr1 = itr1;
3805-
ptr2 = itr2;
3806-
}
3807-
3808-
// Emit memcmp. TODO: LLVM has a pass to expand this for additional
3809-
// performance.
3810-
Value *this_answer = ctx.builder.CreateCall(prepare_call(memcmp_func),
3811-
{ ptr1,
3812-
ptr2,
3813-
ConstantInt::get(ctx.types().T_size, desc.data_bytes) },
3814-
ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
3815-
this_answer = ctx.builder.CreateICmpEQ(this_answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
3816-
answer = answer ? ctx.builder.CreateAnd(answer, this_answer) : this_answer;
3817-
if (endptr1) {
3818-
answerphi->addIncoming(answer, loopBB);
3819-
Value *loopend = ctx.builder.CreateICmpEQ(new_ptr1, endptr1);
3820-
ctx.builder.CreateCondBr(loopend, postBB, loopBB);
3821-
ctx.builder.SetInsertPoint(postBB);
3822-
}
3823-
};
3824-
egal_desc current_desc = {0};
3825-
size_t trailing_data_bytes = emit_masked_bits_compare(emit_desc, sty, current_desc);
3826-
assert(current_desc.nrepeats != 0);
3827-
emit_desc(current_desc);
3828-
if (trailing_data_bytes != 0) {
3829-
current_desc.nrepeats = 1;
3830-
current_desc.data_bytes = trailing_data_bytes;
3831-
current_desc.padding_bytes = 0;
3832-
emit_desc(current_desc);
3833-
}
3834-
return answer;
3835-
}
38363695
else {
38373696
jl_svec_t *types = sty->types;
38383697
Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);

0 commit comments

Comments
 (0)