diff --git a/THIRDPARTY.md b/THIRDPARTY.md index 89d1ce3de3d97..412b84b688758 100644 --- a/THIRDPARTY.md +++ b/THIRDPARTY.md @@ -6,7 +6,6 @@ for exceptions. - [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)]. - [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed. - [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC] -- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT] - [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3] - [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF] - [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3] diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 45fff8dae0d24..be78c308afa53 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -3654,7 +3654,31 @@ function _keepat!(a::AbstractVector, m::AbstractVector{Bool}) deleteat!(a, j:lastindex(a)) end -## 1-d circshift ## +""" + circshift!(a::AbstractVector, shift::Integer) + +Circularly shift, or rotate, the data in vector `a` by `shift` positions. + +# Examples + +```jldoctest +julia> circshift!([1, 2, 3, 4, 5], 2) +5-element Vector{Int64}: + 4 + 5 + 1 + 2 + 3 + +julia> circshift!([1, 2, 3, 4, 5], -2) +5-element Vector{Int64}: + 3 + 4 + 5 + 1 + 2 +``` +""" function circshift!(a::AbstractVector, shift::Integer) n = length(a) n == 0 && return a diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl index ece549eda7a6d..166df78f3130c 100644 --- a/base/compiler/effects.jl +++ b/base/compiler/effects.jl @@ -329,7 +329,6 @@ is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly is_consistent_overlay(effects::Effects) = effects.nonoverlayed === CONSISTENT_OVERLAY -# (sync this with codegen.cpp and staticdata.c effects_foldable functions) function encode_effects(e::Effects) return ((e.consistent % UInt32) << 0) | ((e.effect_free % UInt32) << 3) | diff --git a/base/math.jl b/base/math.jl index 5266cff8d47fc..9c02b43140aaf 100644 --- a/base/math.jl +++ b/base/math.jl @@ -1140,6 +1140,10 @@ function modf(x::T) where T<:IEEEFloat return (rx, ix) end +@inline function use_power_by_squaring(n::Integer) + -2^12 <= n <= 3 * 2^13 +end + # @constprop aggressive to help the compiler see the switch between the integer and float # variants for callers with constant `y` @constprop :aggressive function ^(x::Float64, y::Float64) @@ -1152,24 +1156,33 @@ end y = sign(y)*0x1.8p62 end yint = unsafe_trunc(Int64, y) # This is actually safe since julia freezes the result - y == yint && return @noinline x^yint - 2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x==0 - x<0 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer - !isfinite(x) && return x*(y>0 || isnan(x)) # x is inf or NaN + yisint = y == yint + if yisint + yint == 0 && return 1.0 + use_power_by_squaring(yint) && return @noinline pow_body(x, yint) + end + 2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x === +0.0 or -0.0 (Inf * false === 0.0) + s = 1 + if x < 0 + !yisint && throw_exp_domainerror(x) # y isn't an integer + s = ifelse(isodd(yint), -1, 1) + end + !isfinite(x) && return copysign(x,s)*(y>0 || isnan(x)) # x is inf or NaN + return copysign(pow_body(abs(x), y), s) +end + +@assume_effects :foldable @noinline function pow_body(x::Float64, y::Float64) + xu = reinterpret(UInt64, x) if xu < (UInt64(1)<<52) # x is subnormal xu = reinterpret(UInt64, x * 0x1p52) # normalize x xu &= ~sign_mask(Float64) xu -= UInt64(52) << 52 # mess with the exponent end - return pow_body(xu, y) -end - -@inline function pow_body(xu::UInt64, y::Float64) logxhi,logxlo = _log_ext(xu) xyhi, xylo = two_mul(logxhi,y) xylo = muladd(logxlo, y, xylo) hi = xyhi+xylo - return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ)) + return @inline Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ)) end @constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32} @@ -1193,12 +1206,29 @@ end return T(exp2(log2(abs(widen(x))) * y)) end -# compensated power by squaring @constprop :aggressive @inline function ^(x::Float64, n::Integer) + x^clamp(n, Int64) +end +@constprop :aggressive @inline function ^(x::Float64, n::Int64) n == 0 && return one(x) - return pow_body(x, n) + if use_power_by_squaring(n) + return pow_body(x, n) + else + s = ifelse(x < 0 && isodd(n), -1.0, 1.0) + x = abs(x) + y = float(n) + if y == n + return copysign(pow_body(x, y), s) + else + n2 = n % 1024 + y = float(n - n2) + return pow_body(x, y) * copysign(pow_body(x, n2), s) + end + end end +# compensated power by squaring +# this method is only reliable for -2^20 < n < 2^20 (cf. #53881 #53886) @assume_effects :terminates_locally @noinline function pow_body(x::Float64, n::Integer) y = 1.0 xnlo = ynlo = 0.0 diff --git a/base/precompilation.jl b/base/precompilation.jl index 254bab05a07dc..b33b5a14728e4 100644 --- a/base/precompilation.jl +++ b/base/precompilation.jl @@ -141,15 +141,16 @@ function ExplicitEnv(envpath::String=Base.active_project()) # Extensions deps_pkg = get(Dict{String, Any}, pkg_info, "extensions")::Dict{String, Any} + deps_pkg_concrete = Dict{String, Vector{String}}() for (ext, triggers) in deps_pkg if triggers isa String triggers = [triggers] else triggers = triggers::Vector{String} end - deps_pkg[ext] = triggers + deps_pkg_concrete[ext] = triggers end - extensions[m_uuid] = deps_pkg + extensions[m_uuid] = deps_pkg_concrete # Determine strategy to find package lookup_strat = begin diff --git a/base/regex.jl b/base/regex.jl index 9d5c146a6e840..2b2717a74efc0 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -39,7 +39,13 @@ mutable struct Regex <: AbstractPattern end re = compile(new(pattern, compile_options, match_options, C_NULL)) finalizer(re) do re - re.regex == C_NULL || PCRE.free_re(re.regex) + # don't free during exit because tasks may still be running and + # using it. Issue #57817. During sysimage creation _atexit_hooks_finished + # is not defined but threads aren't running so just always run + during_exit = @isdefined(_atexit_hooks_finished) && _atexit_hooks_finished + if re.regex != C_NULL && !during_exit + PCRE.free_re(re.regex) + end end re end diff --git a/base/special/exp.jl b/base/special/exp.jl index 32de6b9be296d..38d7509807aed 100644 --- a/base/special/exp.jl +++ b/base/special/exp.jl @@ -252,7 +252,7 @@ end twopk = (k + UInt64(53)) << 52 return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53 end - #k == 1024 && return (small_part * 2.0) * 2.0^1023 + k == 1024 && return (small_part * 2.0) * 2.0^1023 end twopk = Int64(k) << 52 return reinterpret(T, twopk + reinterpret(Int64, small_part)) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index 2d5f0cea26e36..438789758cfe0 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -512,11 +512,11 @@ prevind(s::AbstractString, i::Int) = prevind(s, i, 1) function prevind(s::AbstractString, i::Int, n::Int) n < 0 && throw(ArgumentError("n cannot be negative: $n")) - z = ncodeunits(s) + 1 + z = ncodeunits(s)::Int + 1 @boundscheck 0 < i ≤ z || throw(BoundsError(s, i)) - n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i) + n == 0 && return thisind(s, i)::Int == i ? i : string_index_err(s, i) while n > 0 && 1 < i - @inbounds n -= isvalid(s, i -= 1) + @inbounds n -= isvalid(s, i -= 1)::Bool end return i - n end @@ -571,11 +571,11 @@ nextind(s::AbstractString, i::Int) = nextind(s, i, 1) function nextind(s::AbstractString, i::Int, n::Int) n < 0 && throw(ArgumentError("n cannot be negative: $n")) - z = ncodeunits(s) + z = ncodeunits(s)::Int @boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i)) - n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i) + n == 0 && return thisind(s, i)::Int == i ? i : string_index_err(s, i) while n > 0 && i < z - @inbounds n -= isvalid(s, i += 1) + @inbounds n -= isvalid(s, i += 1)::Bool end return i + n end diff --git a/base/summarysize.jl b/base/summarysize.jl index 4f2646c7641b7..62b0ad0849778 100644 --- a/base/summarysize.jl +++ b/base/summarysize.jl @@ -149,13 +149,8 @@ function (ss::SummarySize)(obj::GenericMemory) datakey = unsafe_convert(Ptr{Cvoid}, obj) if !haskey(ss.seen, datakey) ss.seen[datakey] = true - dsize = sizeof(obj) + size += sizeof(obj) T = eltype(obj) - if isbitsunion(T) - # add 1 union selector byte for each element - dsize += length(obj) - end - size += dsize if !isempty(obj) && T !== Symbol && (!Base.allocatedinline(T) || (T isa DataType && !Base.datatype_pointerfree(T))) push!(ss.frontier_x, obj) push!(ss.frontier_i, 1) diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md index 6f4d69b16bc81..2c20c22b33930 100644 --- a/doc/src/manual/calling-c-and-fortran-code.md +++ b/doc/src/manual/calling-c-and-fortran-code.md @@ -547,15 +547,14 @@ is not valid, since the type layout of `T` is not known statically. ### SIMD Values -Note: This feature is currently implemented on 64-bit x86 and AArch64 platforms only. - If a C/C++ routine has an argument or return value that is a native SIMD type, the corresponding Julia type is a homogeneous tuple of `VecElement` that naturally maps to the SIMD type. Specifically: -> * The tuple must be the same size as the SIMD type. For example, a tuple representing an `__m128` -> on x86 must have a size of 16 bytes. -> * The element type of the tuple must be an instance of `VecElement{T}` where `T` is a primitive type that -> is 1, 2, 4 or 8 bytes. +> * The tuple must be the same size and elements as the SIMD type. For example, a tuple +> representing an `__m128` on x86 must have a size of 16 bytes and Float32 elements. +> * The element type of the tuple must be an instance of `VecElement{T}` where `T` is a +> primitive type with a power-of-two number of bytes (e.g. 1, 2, 4, 8, 16, etc) such as +> Int8 or Float64. For instance, consider this C routine that uses AVX intrinsics: @@ -628,6 +627,10 @@ For translating a C argument list to Julia: * `T`, where `T` is a Julia leaf type * argument value will be copied (passed by value) + * `vector T` (or `__attribute__ vector_size`, or a typedef such as `__m128`) + + * `NTuple{N, VecElement{T}}`, where `T` is a primitive Julia type of the correct size + and N is the number of elements in the vector (equal to `vector_size / sizeof T`). * `void*` * depends on how this parameter is used, first translate this to the intended pointer type, then @@ -674,13 +677,16 @@ For translating a C return type to Julia: * `T`, where `T` is one of the primitive types: `char`, `int`, `long`, `short`, `float`, `double`, `complex`, `enum` or any of their `typedef` equivalents - * `T`, where `T` is an equivalent Julia Bits Type (per the table above) - * if `T` is an `enum`, the argument type should be equivalent to `Cint` or `Cuint` + * same as C argument list * argument value will be copied (returned by-value) * `struct T` (including typedef to a struct) - * `T`, where `T` is a Julia Leaf Type + * same as C argument list * argument value will be copied (returned by-value) + + * `vector T` + + * same as C argument list * `void*` * depends on how this parameter is used, first translate this to the intended pointer type, then diff --git a/src/cgutils.cpp b/src/cgutils.cpp index d049327c2bf36..a3eb2df3c7574 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1971,7 +1971,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j else if (!alignment) alignment = julia_alignment(jltype); if (intcast && Order == AtomicOrdering::NotAtomic) { - emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, Align(alignment), intcast->getAlign()); + emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, intcast->getAlign(), Align(alignment)); } else { if (!isboxed && jl_is_genericmemoryref_type(jltype)) { @@ -3214,7 +3214,7 @@ static void union_alloca_type(jl_uniontype_t *ut, [&](unsigned idx, jl_datatype_t *jt) { if (!jl_is_datatype_singleton(jt)) { size_t nb1 = jl_datatype_size(jt); - size_t align1 = jl_datatype_align(jt); + size_t align1 = julia_alignment((jl_value_t*)jt); if (nb1 > nbytes) nbytes = nb1; if (align1 > align) @@ -3796,9 +3796,10 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg // whether we should perform the initialization with the struct as a IR value // or instead initialize the stack buffer with stores + // although we do the former if it is a vector or could be a vector element auto tracked = CountTrackedPointers(lt); bool init_as_value = false; - if (lt->isVectorTy() || jl_is_vecelement_type(ty)) { // maybe also check the size ? + if (lt->isVectorTy() || jl_special_vector_alignment(1, ty) != 0) { init_as_value = true; } else if (tracked.count) { diff --git a/src/codegen.cpp b/src/codegen.cpp index c75c2ad27e384..b46a406edd2be 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8552,6 +8552,8 @@ static jl_llvm_functions_t Type *RT = Arg->getParamStructRetType(); TypeSize sz = DL.getTypeAllocSize(RT); Align al = DL.getPrefTypeAlign(RT); + if (al > MAX_ALIGN) + al = Align(MAX_ALIGN); param.addAttribute(Attribute::NonNull); // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers. param.addDereferenceableAttr(sz); @@ -9677,10 +9679,10 @@ jl_llvm_functions_t jl_emit_codeinst( // Julia-level optimization will never need to see it else if (jl_is_method(def) && // don't delete toplevel code inferred != jl_nothing && // and there is something to delete (test this before calling jl_ir_inlining_cost) - ((!effects_foldable(codeinst->ipo_purity_bits) && // don't delete code we may want for irinterp - (jl_ir_inlining_cost(inferred) == UINT16_MAX) && // don't delete inlineable code - !jl_generating_output()) || // don't delete code when generating a precompile file, trading memory in the short term for avoiding likely duplicating inference work for aotcompile - jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)) { // unless it is constant (although this shouldn't have had code in the first place) + !effects_foldable(codeinst->ipo_purity_bits) && // don't delete code we may want for irinterp + ((jl_ir_inlining_cost(inferred) == UINT16_MAX) || // don't delete inlineable code + jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) && // unless it is constant + !(params.imaging_mode || jl_options.incremental)) { // don't delete code when generating a precompile file jl_atomic_store_release(&codeinst->inferred, jl_nothing); } } diff --git a/src/datatype.c b/src/datatype.c index 8de401f4dd0f7..bb33aa9e397bc 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -298,9 +298,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz, } // Determine if homogeneous tuple with fields of type t will have -// a special alignment beyond normal Julia rules. +// a special alignment and vector-ABI beyond normal rules for aggregates. // Return special alignment if one exists, 0 if normal alignment rules hold. // A non-zero result *must* match the LLVM rules for a vector type . +// Matching the compiler's `__attribute__ vector_size` behavior. // For sake of Ahead-Of-Time (AOT) compilation, this routine has to work // without LLVM being available. unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) @@ -315,8 +316,12 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) // motivating use case comes up for Julia, we reject pointers. return 0; size_t elsz = jl_datatype_size(ty); - if (elsz != 1 && elsz != 2 && elsz != 4 && elsz != 8) - // Only handle power-of-two-sized elements (for now) + if (next_power_of_two(elsz) != elsz) + // Only handle power-of-two-sized elements (for now), since other + // lengths may be packed into very complicated arrangements (llvm pads + // extra bits on most platforms when computing alignment but not when + // computing type size, but adds no extra bytes for each element, so + // their effect on offsets are never what you may naturally expect). return 0; size_t size = nfields * elsz; // Use natural alignment for this vector: this matches LLVM and clang. @@ -707,9 +712,9 @@ void jl_compute_field_offsets(jl_datatype_t *st) } else { fsz = sizeof(void*); - if (fsz > MAX_ALIGN) - fsz = MAX_ALIGN; al = fsz; + if (al > MAX_ALIGN) + al = MAX_ALIGN; desc[i].isptr = 1; zeroinit = 1; npointers++; @@ -929,6 +934,18 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t * jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0); uint32_t nbytes = (nbits + 7) / 8; uint32_t alignm = next_power_of_two(nbytes); +# if defined(_CPU_X86_) && !defined(_OS_WINDOWS_) + // datalayout strings are often weird: on 64-bit they usually follow fairly simple rules, + // but on x86 32 bit platforms, sometimes 5 to 8 byte types are + // 32-bit aligned even though the MAX_ALIGN (for types 9+ bytes) is 16 + // (except for f80 which is align 4 on Mingw, Linux, and BSDs--but align 16 on MSVC and Darwin) + // https://llvm.org/doxygen/ARMTargetMachine_8cpp.html#adb29b487708f0dc2a940345b68649270 + // https://llvm.org/doxygen/AArch64TargetMachine_8cpp.html#a003a58caf135efbf7273c5ed84e700d7 + // https://llvm.org/doxygen/X86TargetMachine_8cpp.html#aefdbcd6131ef195da070cef7fdaf0532 + // 32-bit alignment is weird + if (alignm == 8) + alignm = 4; +# endif if (alignm > MAX_ALIGN) alignm = MAX_ALIGN; // memoize isprimitivetype, since it is much easier than checking diff --git a/src/gc-debug.c b/src/gc-debug.c index 124b7da74dee1..3aa1612572bf6 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1100,13 +1100,14 @@ void gc_stats_big_obj(void) v = v->next; } - mallocarray_t *ma = ptls2->heap.mallocarrays; - while (ma != NULL) { - if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) { + void **lst = ptls2->gc_tls.heap.mallocarrays.items; + for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) { + jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1); + uint8_t bits = jl_astaggedvalue(m)->bits.gc; + if (gc_marked(bits)) { nused++; - nbytes += jl_genericmemory_nbytes((jl_genericmemory_t*)ma->a); + nbytes += jl_genericmemory_nbytes(m); } - ma = ma->next; } } diff --git a/src/gc.c b/src/gc.c index e89e16ff187c0..c4c83861f5a52 100644 --- a/src/gc.c +++ b/src/gc.c @@ -6,7 +6,11 @@ #include "julia_atomics.h" #include "julia_gcext.h" #include "julia_assert.h" -#ifdef __GLIBC__ +#include + +#if defined(_OS_DARWIN_) +#include +#else #include // for malloc_trim #endif @@ -1121,17 +1125,8 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ // This is **NOT** a GC safe point. - mallocarray_t *ma; - if (ptls->heap.mafreelist == NULL) { - ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t)); - } - else { - ma = ptls->heap.mafreelist; - ptls->heap.mafreelist = ma->next; - } - ma->a = (jl_value_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->heap.mallocarrays; - ptls->heap.mallocarrays = ma; + void *a = (void*)((uintptr_t)m | !!isaligned); + small_arraylist_push(&ptls->heap.mallocarrays, a); } @@ -1143,10 +1138,6 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT jl_batch_accum_heap_size(ptls, sz); } -void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT -{ - jl_batch_accum_free_size(jl_current_task->ptls, sz); -} // Only safe to update the heap inside the GC static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT @@ -1222,19 +1213,21 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT } -static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT +static void jl_gc_free_memory(jl_genericmemory_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); jl_genericmemory_t *m = (jl_genericmemory_t*)v; assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2); char *d = (char*)m->ptr; + size_t freed_bytes = memory_block_usable_size(d, isaligned); + assert(freed_bytes != 0); if (isaligned) jl_free_aligned(d); else free(d); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, - jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m)); - gc_num.freed += jl_genericmemory_nbytes(m); + jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_bytes); + gc_num.freed += freed_bytes; gc_num.freecall++; } @@ -1245,24 +1238,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - mallocarray_t *ma = ptls2->heap.mallocarrays; - mallocarray_t **pma = &ptls2->heap.mallocarrays; - while (ma != NULL) { - mallocarray_t *nxt = ma->next; - jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1); - int bits = jl_astaggedvalue(a)->bits.gc; - if (gc_marked(bits)) { - pma = &ma->next; + size_t n = 0; + size_t l = ptls2->heap.mallocarrays.len; + void **lst = ptls2->heap.mallocarrays.items; + // filter without preserving order + while (n < l) { + jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1); + if (gc_marked(jl_astaggedvalue(m)->bits.gc)) { + n++; } else { - *pma = nxt; - int isaligned = (uintptr_t)ma->a & 1; - jl_gc_free_memory(a, isaligned); - free(ma); + int isaligned = (uintptr_t)lst[n] & 1; + jl_gc_free_memory(m, isaligned); + l--; + lst[n] = lst[l]; } - gc_time_count_mallocd_memory(bits); - ma = nxt; } + ptls2->heap.mallocarrays.len = l; } } gc_time_mallocd_memory_end(); @@ -3968,8 +3960,7 @@ void jl_init_thread_heap(jl_ptls_t ptls) small_arraylist_new(&heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; + small_arraylist_new(&heap->mallocarrays, 0); heap->big_objects = NULL; heap->remset = &heap->_remset[0]; heap->last_remset = &heap->_remset[1]; @@ -4069,58 +4060,44 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } -// allocation wrappers that track allocation and let collection run +// allocation wrappers that add to gc pressure -JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) +JL_DLLEXPORT void *jl_malloc(size_t sz) { - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; - void *data = malloc(sz); - if (data != NULL && pgcstack != NULL && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - jl_batch_accum_heap_size(ptls, sz); - } - return data; + return jl_gc_counted_malloc(sz); } -JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + return jl_gc_counted_calloc(nmsz, 1); +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) { - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; - void *data = calloc(nm, sz); - if (data != NULL && pgcstack != NULL && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - jl_batch_accum_heap_size(ptls, sz * nm); - } - return data; + if (nm > SSIZE_MAX/sz) + return NULL; + return _unchecked_calloc(nm, sz); } -JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) +JL_DLLEXPORT void jl_free(void *p) { - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; - free(p); - if (pgcstack != NULL && ct->world_age) { - jl_batch_accum_free_size(ct->ptls, sz); + if (p != NULL) { + size_t sz = memory_block_usable_size(p, 0); + free(p); + jl_task_t *ct = jl_get_current_task(); + if (ct != NULL) + jl_batch_accum_free_size(ct->ptls, sz); } } -JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) { - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; + size_t old = p ? memory_block_usable_size(p, 0) : 0; void *data = realloc(p, sz); - if (data != NULL && pgcstack != NULL && ct->world_age) { + jl_task_t *ct = jl_get_current_task(); + if (data != NULL && ct != NULL) { + sz = memory_block_usable_size(data, 0); jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (!(sz < old)) @@ -4140,63 +4117,80 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) +JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 + jl_task_t *ct = jl_current_task; + void *data = malloc(sz); + if (data != NULL && ct != NULL && ct->world_age) { + sz = memory_block_usable_size(data, 0); + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + jl_batch_accum_heap_size(ptls, sz); + } + return data; } -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) +{ + jl_task_t *ct = jl_current_task; + void *data = calloc(nm, sz); + if (data != NULL && ct != NULL && ct->world_age) { + sz = memory_block_usable_size(data, 0); + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + jl_batch_accum_heap_size(ptls, sz); + } + return data; } -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); + jl_free(p); } -JL_DLLEXPORT void jl_free(void *p) +JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) { - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } + return jl_realloc(p, sz); } -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +// =========================================================================== // +// malloc wrappers, aligned allocation +// =========================================================================== // + +#if defined(_OS_WINDOWS_) +// helper function based partly on wine msvcrt80+ heap.c +// but with several fixes to improve the correctness of the computation and remove unnecessary parameters +#define SAVED_PTR(x) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \ + ~(sizeof(void *) - 1))) +static size_t _aligned_msize(void *p) { - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 + void *alloc_ptr = *(void**)SAVED_PTR(p); + return _msize(alloc_ptr) - ((char*)p - (char*)alloc_ptr); } +#undef SAVED_PTR +#endif +size_t memory_block_usable_size(void *p, int isaligned) JL_NOTSAFEPOINT +{ +#if defined(_OS_WINDOWS_) + if (isaligned) + return _aligned_msize(p); + else + return _msize(p); +#elif defined(_OS_DARWIN_) + return malloc_size(p); +#else + return malloc_usable_size(p); +#endif +} // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -4214,12 +4208,13 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) void *b = malloc_cache_align(allocsz); if (b == NULL) jl_throw(jl_memory_exception); - + size_t allocated_bytes = memory_block_usable_size(b, 1); + assert(allocated_bytes >= allocsz); jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocated_bytes); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - jl_batch_accum_heap_size(ptls, allocsz); + jl_batch_accum_heap_size(ptls, allocated_bytes); #ifdef _OS_WINDOWS_ SetLastError(last_error); #endif diff --git a/src/gc.h b/src/gc.h index 01d8745b2899e..7e4da2bd1900f 100644 --- a/src/gc.h +++ b/src/gc.h @@ -143,11 +143,6 @@ JL_EXTENSION typedef struct _bigval_t { // data structure for tracking malloc'd arrays and genericmemory. -typedef struct _mallocarray_t { - jl_value_t *a; - struct _mallocarray_t *next; -} mallocarray_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list diff --git a/src/genericmemory.c b/src/genericmemory.c index b36852d53f9c8..02293867da4df 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -165,7 +165,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void if (own_buffer) { int isaligned = 0; // TODO: allow passing memalign'd buffers jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned); - jl_gc_count_allocd(nel*elsz); + size_t allocated_bytes = memory_block_usable_size(data, isaligned); + jl_gc_count_allocd(allocated_bytes); } return m; } @@ -208,8 +209,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_ JL_GC_PUSH1(&o); jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len); JL_GC_POP(); - if (how == 1) // TODO: we might like to early-call jl_gc_free_memory here instead actually, but hopefully `m` will die soon - jl_gc_count_freed(mlength); return str; } // n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object diff --git a/src/julia_internal.h b/src/julia_internal.h index 1c2d071d1a6cd..05a2f1e677d60 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -608,6 +608,7 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...); #endif jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz); +JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz); JL_DLLEXPORT void JL_NORETURN jl_throw_out_of_memory_error(void); @@ -618,6 +619,7 @@ JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT; void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT; void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT; size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT; +size_t memory_block_usable_size(void *mem, int isaligned) JL_NOTSAFEPOINT; void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT; void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT; void jl_gc_run_all_finalizers(jl_task_t *ct); diff --git a/src/julia_threads.h b/src/julia_threads.h index 3a0f7f12bffe5..0ca47cc553c88 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -130,8 +130,7 @@ typedef struct { small_arraylist_t live_tasks; // variables for tracking malloc'd arrays - struct _mallocarray_t *mallocarrays; - struct _mallocarray_t *mafreelist; + small_arraylist_t mallocarrays; // variables for tracking big objects struct _bigval_t *big_objects; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 0fb5b9bb18805..8ba321c75b239 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2331,8 +2331,10 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { // strip all constant alias information, as it might depend on the gc having // preserved a gc root, which stops being true after this pass (#32215) // similar to RewriteStatepointsForGC::stripNonValidData, but less aggressive - if (I->getMetadata(LLVMContext::MD_invariant_load)) - I->setMetadata(LLVMContext::MD_invariant_load, NULL); + if (auto *LI = dyn_cast(I)){ + if (isSpecialPtr(LI->getPointerOperand()->getType()) && LI->getMetadata(LLVMContext::MD_invariant_load)) + LI->setMetadata(LLVMContext::MD_invariant_load, NULL); + } if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) { if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const", "jtbaa_memoryptr", "jtbaa_memorylen", "tbaa_memoryown"})) { MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA); diff --git a/src/mtarraylist.c b/src/mtarraylist.c index 8bad44797dab4..1bd6810cda8a6 100644 --- a/src/mtarraylist.c +++ b/src/mtarraylist.c @@ -14,8 +14,8 @@ extern "C" { // but there can be any number of observers typedef struct { - _Atomic(uint32_t) len; - uint32_t max; + _Atomic(size_t) len; + size_t max; _Atomic(_Atomic(void*)*) items; _Atomic(void*) _space[SMALL_AL_N_INLINE]; } small_mtarraylist_t; diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 5c12e3dad0dd7..2df9d0dfd5a31 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -490,6 +490,13 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder * FPM.addPass(IRCEPass()); FPM.addPass(InstCombinePass()); FPM.addPass(JumpThreadingPass()); + } else if (O.getSpeedupLevel() >= 1) { + JULIA_PASS(FPM.addPass(AllocOptPass())); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); + FPM.addPass(MemCpyOptPass()); + FPM.addPass(SCCPPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(ADCEPass()); } if (O.getSpeedupLevel() >= 3) { FPM.addPass(GVNPass()); diff --git a/src/staticdata.c b/src/staticdata.c index 9d4c60a137058..76bb488731a92 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -725,16 +725,6 @@ static uintptr_t jl_fptr_id(void *fptr) return *(uintptr_t*)pbp; } -static int effects_foldable(uint32_t effects) -{ - // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true) - return ((effects & 0x7) == 0) && // is_consistent(effects) - (((effects >> 10) & 0x03) == 0) && // is_noub(effects) - (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects) - ((effects >> 6) & 0x01); // is_terminates(effects) -} - - // `jl_queue_for_serialization` adds items to `serialization_order` #define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0) static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED; @@ -848,25 +838,8 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ // TODO: if (ci in ci->defs->cache) record_field_change((jl_value_t**)&ci->next, NULL); } - jl_value_t *inferred = jl_atomic_load_relaxed(&ci->inferred); - if (inferred && inferred != jl_nothing) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized) - if (!is_relocatable_ci(&relocatable_ext_cis, ci)) - record_field_change((jl_value_t**)&ci->inferred, jl_nothing); - else if (jl_is_method(ci->def->def.method) && // don't delete toplevel code - ci->def->def.method->source) { // don't delete code from optimized opaque closures that can't be reconstructed (and builtins) - if (jl_atomic_load_relaxed(&ci->max_world) != ~(size_t)0 || // delete all code that cannot run - jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) { // delete all code that just returns a constant - record_field_change((jl_value_t**)&ci->inferred, jl_nothing); - } - else if (native_functions && // don't delete any code if making a ji file - (ci->owner == jl_nothing) && // don't delete code for external interpreters - !effects_foldable(ci->ipo_purity_bits) && // don't delete code we may want for irinterp - jl_ir_inlining_cost(inferred) == UINT16_MAX) { // don't delete inlineable code - // delete the code now: if we thought it was worth keeping, it would have been converted to object code - record_field_change((jl_value_t**)&ci->inferred, jl_nothing); - } - } - } + if (jl_atomic_load_relaxed(&ci->inferred) && !is_relocatable_ci(&relocatable_ext_cis, ci)) + record_field_change((jl_value_t**)&ci->inferred, jl_nothing); } if (immediate) // must be things that can be recursively handled, and valid as type parameters diff --git a/src/subtype.c b/src/subtype.c index 2d1221904d149..8bb7a2070c23c 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -2667,31 +2667,22 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten // subtype, treating all vars as existential static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) { - jl_varbinding_t *v = e->vars; - int len = 0; if (x == jl_bottom_type || y == (jl_value_t*)jl_any_type) return 1; - while (v != NULL) { - len++; - v = v->prev; - } - int8_t *rs = (int8_t*)malloc_s(len); + int8_t *rs = (int8_t*)alloca(current_env_length(e)); + jl_varbinding_t *v = e->vars; int n = 0; - v = e->vars; - while (n < len) { - assert(v != NULL); + while (v != NULL) { rs[n++] = v->right; v->right = 1; v = v->prev; } int issub = subtype_in_env(x, y, e); n = 0; v = e->vars; - while (n < len) { - assert(v != NULL); + while (v != NULL) { v->right = rs[n++]; v = v->prev; } - free(rs); return issub; } @@ -2739,6 +2730,8 @@ static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTS } +static int intersect_var_ccheck_in_env(jl_value_t *xlb, jl_value_t *xub, jl_value_t *ylb, jl_value_t *yub, jl_stenv_t *e, int flip); + static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param) { jl_varbinding_t *bb = lookup(e, b); @@ -2750,20 +2743,14 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int return R ? intersect(a, bb->lb, e, param) : intersect(bb->lb, a, e, param); if (!jl_is_type(a) && !jl_is_typevar(a)) return set_var_to_const(bb, a, e, R); - jl_savedenv_t se; if (param == 2) { jl_value_t *ub = NULL; JL_GC_PUSH1(&ub); if (!jl_has_free_typevars(a)) { - save_env(e, &se, 1); - int issub = subtype_in_env_existential(bb->lb, a, e); - restore_env(e, &se, 1); - if (issub) { - issub = subtype_in_env_existential(a, bb->ub, e); - restore_env(e, &se, 1); - } - free_env(&se); - if (!issub) { + if (R) flip_offset(e); + int ccheck = intersect_var_ccheck_in_env(bb->lb, bb->ub, a, a, e, !R); + if (R) flip_offset(e); + if (!ccheck) { JL_GC_POP(); return jl_bottom_type; } @@ -2773,6 +2760,7 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int e->triangular++; ub = R ? intersect_aside(a, bb->ub, e, bb->depth0) : intersect_aside(bb->ub, a, e, bb->depth0); e->triangular--; + jl_savedenv_t se; save_env(e, &se, 1); int issub = subtype_in_env_existential(bb->lb, ub, e); restore_env(e, &se, 1); @@ -3845,6 +3833,89 @@ static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOT return compareto_var(x, (jl_tvar_t*)y, e, -1) || compareto_var(y, (jl_tvar_t*)x, e, 1); } +static int intersect_var_ccheck_in_env(jl_value_t *xlb, jl_value_t *xub, jl_value_t *ylb, jl_value_t *yub, jl_stenv_t *e, int flip) +{ + int easy_check1 = xlb == jl_bottom_type || + yub == (jl_value_t *)jl_any_type || + (e->Loffset == 0 && obviously_in_union(yub, xlb)); + int easy_check2 = ylb == jl_bottom_type || + xub == (jl_value_t *)jl_any_type || + (e->Loffset == 0 && obviously_in_union(xub, ylb)); + int nofree1 = 0, nofree2 = 0; + if (!easy_check1) { + nofree1 = !jl_has_free_typevars(xlb) && !jl_has_free_typevars(yub); + if (nofree1 && e->Loffset == 0) { + easy_check1 = jl_subtype(xlb, yub); + if (!easy_check1) + return 0; + } + } + if (!easy_check2) { + nofree2 = !jl_has_free_typevars(ylb) && !jl_has_free_typevars(xub); + if (nofree2 && e->Loffset == 0) { + easy_check2 = jl_subtype(ylb, xub); + if (!easy_check2) + return 0; + } + } + if (easy_check1 && easy_check2) + return 1; + int ccheck = 0; + if ((easy_check1 || nofree1) && (easy_check2 || nofree2)) { + jl_varbinding_t *vars = e->vars; + e->vars = NULL; + ccheck = easy_check1 || subtype_in_env(xlb, yub, e); + if (ccheck && !easy_check2) { + flip_offset(e); + ccheck = subtype_in_env(ylb, xub, e); + flip_offset(e); + } + e->vars = vars; + return ccheck; + } + jl_savedenv_t se; + save_env(e, &se, 1); + // first try normal flip. + if (flip) flip_vars(e); + ccheck = easy_check1 || subtype_in_env(xlb, yub, e); + if (ccheck && !easy_check2) { + flip_offset(e); + ccheck = subtype_in_env(ylb, xub, e); + flip_offset(e); + } + if (flip) flip_vars(e); + if (!ccheck) { + // then try reverse flip. + restore_env(e, &se, 1); + if (!flip) flip_vars(e); + ccheck = easy_check1 || subtype_in_env(xlb, yub, e); + if (ccheck && !easy_check2) { + flip_offset(e); + ccheck = subtype_in_env(ylb, xub, e); + flip_offset(e); + } + if (!flip) flip_vars(e); + } + if (!ccheck) { + // then try existential. + restore_env(e, &se, 1); + if (easy_check1) + ccheck = 1; + else { + ccheck = subtype_in_env_existential(xlb, yub, e); + restore_env(e, &se, 1); + } + if (ccheck && !easy_check2) { + flip_offset(e); + ccheck = subtype_in_env_existential(ylb, xub, e); + flip_offset(e); + restore_env(e, &se, 1); + } + } + free_env(&se); + return ccheck; +} + static int has_typevar_via_env(jl_value_t *x, jl_tvar_t *t, jl_stenv_t *e) { if (e->Loffset == 0) { @@ -3977,14 +4048,8 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa ccheck = 1; } else { - if (R) flip_vars(e); - ccheck = subtype_in_env(xlb, yub, e); - if (ccheck) { - flip_offset(e); - ccheck = subtype_in_env(ylb, xub, e); - flip_offset(e); - } - if (R) flip_vars(e); + // try many subtype check to avoid false `Union{}` + ccheck = intersect_var_ccheck_in_env(xlb, xub, ylb, yub, e, R); } if (R) flip_offset(e); if (!ccheck) diff --git a/src/support/arraylist.h b/src/support/arraylist.h index 6ad2f0e2f28c9..edad2880dbed2 100644 --- a/src/support/arraylist.h +++ b/src/support/arraylist.h @@ -5,7 +5,7 @@ #define AL_N_INLINE 29 -#define SMALL_AL_N_INLINE 6 +#define SMALL_AL_N_INLINE 5 #ifdef __cplusplus extern "C" { @@ -13,7 +13,7 @@ extern "C" { #include "analyzer_annotations.h" -typedef struct { +typedef struct { // 32 words size_t len; size_t max; void **items; @@ -27,9 +27,9 @@ void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT; void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT; JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT; -typedef struct { - uint32_t len; - uint32_t max; +typedef struct { // 8 words + size_t len; + size_t max; void **items; void *_space[SMALL_AL_N_INLINE]; } small_arraylist_t; diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl index 8d1ded9bf8111..db0d44a1e25a6 100644 --- a/stdlib/LinearAlgebra/src/bunchkaufman.jl +++ b/stdlib/LinearAlgebra/src/bunchkaufman.jl @@ -127,6 +127,9 @@ function bunchkaufman!(A::StridedMatrix{<:BlasFloat}, rook::Bool = false; check: end end +bkcopy_oftype(A, S) = eigencopy_oftype(A, S) +bkcopy_oftype(A::Symmetric{<:Complex}, S) = Symmetric(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo)) + """ bunchkaufman(A, rook::Bool=false; check = true) -> S::BunchKaufman @@ -206,7 +209,7 @@ julia> S.L*S.D*S.L' - A[S.p, S.p] ``` """ bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} = - bunchkaufman!(eigencopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check) + bunchkaufman!(bkcopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check) BunchKaufman{T}(B::BunchKaufman) where {T} = BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info) @@ -1529,7 +1532,7 @@ function bunchkaufman(A::AbstractMatrix{TS}, rook::Bool = false; check::Bool = true ) where TS <: ClosedScalar{TR} where TR <: ClosedReal - return bunchkaufman!(eigencopy_oftype(A, TS), rook; check) + return bunchkaufman!(bkcopy_oftype(A, TS), rook; check) end function bunchkaufman(A::AbstractMatrix{TS}, @@ -1551,15 +1554,15 @@ function bunchkaufman(A::AbstractMatrix{TS}, # We promote input to BigInt to avoid overflow problems if TA == Nothing if TS <: Integer - M = Rational{BigInt}.(eigencopy_oftype(A, TS)) + M = Rational{BigInt}.(bkcopy_oftype(A, TS)) else - M = Complex{Rational{BigInt}}.(eigencopy_oftype(A, TS)) + M = Complex{Rational{BigInt}}.(bkcopy_oftype(A, TS)) end else if TS <: Integer - M = TA(Rational{BigInt}.(eigencopy_oftype(A, TS)), Symbol(A.uplo)) + M = TA(Rational{BigInt}.(bkcopy_oftype(A, TS)), Symbol(A.uplo)) else - M = TA(Complex{Rational{BigInt}}.(eigencopy_oftype(A, TS)), + M = TA(Complex{Rational{BigInt}}.(bkcopy_oftype(A, TS)), Symbol(A.uplo)) end end diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl index 0c86383685807..e9f43ef640392 100644 --- a/stdlib/LinearAlgebra/src/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl @@ -3,6 +3,7 @@ # preserve HermOrSym wrapper eigencopy_oftype(A::Hermitian, S) = Hermitian(copy_similar(A, S), sym_uplo(A.uplo)) eigencopy_oftype(A::Symmetric, S) = Symmetric(copy_similar(A, S), sym_uplo(A.uplo)) +eigencopy_oftype(A::Symmetric{<:Complex}, S) = copyto!(similar(parent(A), S), A) # Eigensolvers for symmetric and Hermitian matrices eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) = diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 39ae7ec83a5c3..136f741fa5c0e 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -250,4 +250,11 @@ end @test axes(S) === (r,r) end +@testset "complex Symmetric" begin + D = diagm(0=>ComplexF64[1,2]) + S = Symmetric(D) + H = hessenberg(S) + @test H.H == D +end + end # module TestHessenberg diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl index b3a5472c511f4..258ccbd2d4591 100644 --- a/stdlib/LinearAlgebra/test/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl @@ -151,4 +151,10 @@ end @test HT * V ≈ V * Diagonal(λ) end +@testset "complex Symmetric" begin + S = Symmetric(rand(ComplexF64,2,2)) + λ, v = eigen(S) + @test S * v ≈ v * Diagonal(λ) +end + end # module TestSymmetricEigen diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md index d2a17e3a6b4a3..fdc1be5c5f357 100644 --- a/stdlib/REPL/docs/src/index.md +++ b/stdlib/REPL/docs/src/index.md @@ -341,7 +341,15 @@ mapfoldl mapfoldr When a single complete tab-complete result is available at the end of an input line and 2 or more characters have been typed, a hint of the completion will show in a lighter color. -This can be disabled via `Base.active_repl.options.hint_tab_completes = false`. +This can be disabled via `Base.active_repl.options.hint_tab_completes = false` or by adding +``` +atreplinit() do repl + if VERSION >= v"1.11.0-0" + repl.options.hint_tab_completes = false + end +end +``` +to your `~/.julia/config/startup.jl`. !!! compat "Julia 1.11" Tab-complete hinting was added in Julia 1.11 diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl index 63f65b017d393..c6c046b9b40c6 100644 --- a/stdlib/TOML/src/print.jl +++ b/stdlib/TOML/src/print.jl @@ -77,7 +77,7 @@ end # Fallback function printvalue(f::MbyFunc, io::IO, value, sorted::Bool) toml_value = to_toml_value(f, value) - @invokelatest printvalue(f, io, toml_value) + @invokelatest printvalue(f, io, toml_value, sorted) end function printvalue(f::MbyFunc, io::IO, value::AbstractVector, sorted::Bool) @@ -156,7 +156,7 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict, ) if a in inline_tables - @invokelatest print_inline_table(f, io, a) + @invokelatest print_inline_table(f, io, a, sorted) return end diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl index 8fba1b1c1df10..e8a6431cb34a7 100644 --- a/stdlib/TOML/test/print.jl +++ b/stdlib/TOML/test/print.jl @@ -94,6 +94,14 @@ loaders = ["gzip", { driver = "csv", args = {delim = "\t"}}] a = 222 d = 333 """ + + # https://github.com/JuliaLang/julia/pull/57584 + d = Dict("b" => [MyStruct(1), MyStruct(2)]) + @test toml_str(d) do x + x isa MyStruct && return Dict("a" => x.a) + end == """ + b = [{a = 1}, {a = 2}] + """ end @testset "unsigned integers" for (x, s) in [ @@ -196,6 +204,14 @@ LocalPkg = {path = "LocalPkg"} @test toml_str(d; sorted=true, inline_tables) == s @test roundtrip(s) + +# https://github.com/JuliaLang/julia/pull/57584 +d = Dict("a" => 1, "b" => 2) +inline_tables = IdSet{Dict}([d]) +s = "{a = 1, b = 2}" +@test toml_str(d; sorted=true, inline_tables) == s + + # multiline strings (#55083) s = """ a = \"\"\"lorem ipsum diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index e434899be6e31..10b66fda33667 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -866,7 +866,7 @@ if Sys.ARCH === :x86_64 foo52079() = Core.Intrinsics.have_fma(Float64) if foo52079() == true let io = IOBuffer() - code_native(io,^,(Float64,Float64), dump_module=false) + code_native(io,Base.Math.exp_impl,(Float64,Float64,Val{:ℯ}), dump_module=false) str = String(take!(io)) @test !occursin("fma_emulated", str) @test occursin("vfmadd", str) @@ -933,3 +933,8 @@ let end nothing end + +struct Vec56937 x::NTuple{8, VecElement{Int}} end + +x56937 = Ref(Vec56937(ntuple(_->VecElement(1),8))) +@test x56937[].x[1] == VecElement{Int}(1) # shouldn't crash diff --git a/test/core.jl b/test/core.jl index 9750cc519a746..f525a3fc39e12 100644 --- a/test/core.jl +++ b/test/core.jl @@ -5668,6 +5668,13 @@ let ni128 = sizeof(FP128test) ÷ sizeof(Int), @test reinterpret(UInt128, arr[2].fp) == expected end +# make sure VecElement Tuple has the C alignment and ABI for supported types +primitive type Int24 24 end +@test Base.datatype_alignment(NTuple{10,VecElement{Int16}}) == 32 +@test Base.datatype_alignment(NTuple{10,VecElement{Int24}}) == 4 +@test Base.datatype_alignment(NTuple{10,VecElement{Int64}}) == 128 +@test Base.datatype_alignment(NTuple{10,VecElement{Int128}}) == 256 + # issue #21516 struct T21516 x::Vector{Float64} diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index 6dee18da5975f..8ca588f717ee2 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -125,6 +125,20 @@ top: ret void } +; Confirm that `invariant.load` on other loads survive +define void @gc_keep_invariant(float addrspace(1)* %0) { +top: +; CHECK-LABEL: @gc_keep_invariant + %pgcstack = call {}*** @julia.get_pgcstack() + %1 = bitcast {}*** %pgcstack to {}** + %current_task = getelementptr inbounds {}*, {}** %1, i64 -12 + +; CHECK: %current_task = getelementptr inbounds ptr, ptr %1, i64 -12 + %2 = load float, ptr addrspace(1) %0, align 4, !invariant.load !1 +; CHECK-NEXT: %2 = load float, ptr addrspace(1) %0, align 4, !invariant.load + ret void +} + define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) { top: ; CHECK-LABEL: @callee_root diff --git a/test/math.jl b/test/math.jl index c48a0c7f56323..d73c13530f166 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1464,6 +1464,25 @@ end # two cases where we have observed > 1 ULP in the past @test 0.0013653274095082324^-97.60372292227069 == 4.088393948750035e279 @test 8.758520413376658e-5^70.55863059215994 == 5.052076767078296e-287 + + # issue #53881 + c53881 = 2.2844135865398217e222 # check correctness within 2 ULPs + @test prevfloat(1.0) ^ -Int64(2)^62 ≈ c53881 atol=2eps(c53881) + @test 2.0 ^ typemin(Int) == 0.0 + @test (-1.0) ^ typemin(Int) == 1.0 + Z = Int64(2) + E = prevfloat(1.0) + @test E ^ (-Z^54) ≈ 7.38905609893065 + @test E ^ (-Z^62) ≈ 2.2844135865231613e222 + @test E ^ (-Z^63) == Inf + @test abs(E ^ (Z^62-1) * E ^ (-Z^62+1) - 1) <= eps(1.0) + n, x = -1065564664, 0.9999997040311492 + @test abs(x^n - Float64(big(x)^n)) / eps(x^n) == 0 # ULPs + @test E ^ (big(2)^100 + 1) == 0 + @test E ^ 6705320061009595392 == nextfloat(0.0) + n = Int64(1024 / log2(E)) + @test E^n == Inf + @test E^float(n) == Inf end # Test that sqrt behaves correctly and doesn't exhibit fp80 double rounding. diff --git a/test/misc.jl b/test/misc.jl index 3907354e9410b..a66c868aa1e17 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -598,6 +598,11 @@ let z = Z53061[Z53061(S53061(rand(), (rand(),rand())), 0) for _ in 1:10^4] @test abs(summarysize(z) - 640000)/640000 <= 0.01 broken = Sys.WORD_SIZE == 32 && Sys.islinux() end +# issue #57506 +let len = 100, m1 = Memory{UInt8}(1:len), m2 = Memory{Union{Nothing,UInt8}}(1:len) + @test summarysize(m2) == summarysize(m1) + len +end + ## test conversion from UTF-8 to UTF-16 (for Windows APIs) # empty arrays diff --git a/test/regex.jl b/test/regex.jl index e5f1428527512..ca411b26bbacc 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -245,3 +245,11 @@ end @test match(re, "ababc").match === SubString("ababc", 3:5) end end + +@testset "#57817: Don't free Regex during exit finalizer calls" begin + # this shouldn't segfault + cmd = `$(Base.julia_cmd()) -t2 --startup-file=no -e 're = Regex(""); Threads.@spawn match(re, "", 1, UInt32(0))'` + for i in 1:10 + @test success(pipeline(cmd, stderr=stderr)) + end +end diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 87d812c5bf201..955da2d7c4564 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -878,6 +878,11 @@ end end end end + + @testset "return type infers to `Int`" begin + @test Int === Base.infer_return_type(prevind, Tuple{AbstractString, Vararg}) + @test Int === Base.infer_return_type(nextind, Tuple{AbstractString, Vararg}) + end end @testset "first and last" begin diff --git a/test/subtype.jl b/test/subtype.jl index ba7f86bb86a14..979746bd626dc 100644 --- a/test/subtype.jl +++ b/test/subtype.jl @@ -1691,9 +1691,7 @@ CovType{T} = Union{AbstractArray{T,2}, # issue #31703 @testintersect(Pair{<:Any, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}}, Pair{T, S} where S<:(Ref{A} where A<:(Tuple{C,Ref{T}} where C<:(Ref{D} where D<:(Ref{E} where E<:Tuple{FF}) where FF<:B)) where B) where T, - Pair{T, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}} where T) -# TODO: should be able to get this result -# Pair{Float64, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}} + Pair{Float64, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}}) module I31703 using Test, LinearAlgebra @@ -1745,8 +1743,7 @@ end Tuple{Type{SA{2, L}}, Type{SA{2, L}}} where L) @testintersect(Tuple{Type{SA{2, L}}, Type{SA{2, 16}}} where L, Tuple{Type{<:SA{N, L}}, Type{<:SA{N, L}}} where {N,L}, - # TODO: this could be narrower - Tuple{Type{SA{2, L}}, Type{SA{2, 16}}} where L) + Tuple{Type{SA{2, 16}}, Type{SA{2, 16}}}) # issue #31993 @testintersect(Tuple{Type{<:AbstractVector{T}}, Int} where T, @@ -1851,9 +1848,9 @@ c32703(::Type{<:Str{C}}, str::Str{C}) where {C<:CSE} = str Tuple{Type{<:Str{C}}, Str{C}} where {C<:CSE}, Union{}) @test c32703(UTF16Str, ASCIIStr()) == 42 -@test_broken typeintersect(Tuple{Vector{Vector{Float32}},Matrix,Matrix}, - Tuple{Vector{V},Matrix{Int},Matrix{S}} where {S, V<:AbstractVector{S}}) == - Tuple{Array{Array{Float32,1},1},Array{Int,2},Array{Float32,2}} +@testintersect(Tuple{Vector{Vector{Float32}},Matrix,Matrix}, + Tuple{Vector{V},Matrix{Int},Matrix{S}} where {S, V<:AbstractVector{S}}, + Tuple{Array{Array{Float32,1},1},Array{Int,2},Array{Float32,2}}) @testintersect(Tuple{Pair{Int, DataType}, Any}, Tuple{Pair{A, B} where B<:Type, Int} where A, @@ -2469,6 +2466,11 @@ end abstract type P47654{A} end @test Wrapper47654{P47654, Vector{Union{P47654,Nothing}}} <: Wrapper47654 +#issue 41561 +@testintersect(Tuple{Vector{VT}, Vector{VT}} where {N1, VT<:AbstractVector{N1}}, + Tuple{Vector{VN} where {N, VN<:AbstractVector{N}}, Vector{Vector{Float64}}}, + Tuple{Vector{Vector{Float64}}, Vector{Vector{Float64}}}) + @testset "known subtype/intersect issue" begin #issue 45874 let S = Pair{Val{P}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where P, @@ -2476,9 +2478,6 @@ abstract type P47654{A} end @test S <: T end - #issue 41561 - @test_broken typeintersect(Tuple{Vector{VT}, Vector{VT}} where {N1, VT<:AbstractVector{N1}}, - Tuple{Vector{VN} where {N, VN<:AbstractVector{N}}, Vector{Vector{Float64}}}) !== Union{} #issue 40865 @test Tuple{Set{Ref{Int}}, Set{Ref{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Ref{K}}} @test Tuple{Set{Val{Int}}, Set{Val{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Val{K}}} @@ -2746,3 +2745,15 @@ end Val{Tuple{T,R,S}} where {T,R<:Vector{T},S<:Vector{R}}, Val{Tuple{Int, Vector{Int}, T}} where T<:Vector{Vector{Int}}, ) + +#issue 57429 +@testintersect( + Pair{<:Any, <:Tuple{Int}}, + Pair{N, S} where {N, NTuple{N,Int}<:S<:NTuple{M,Int} where {M}}, + !Union{} +) +@testintersect( + Pair{N, T} where {N,NTuple{N,Int}<:T<:NTuple{N,Int}}, + Pair{N, T} where {N,NTuple{N,Int}<:T<:Tuple{Int,Vararg{Int}}}, + !Union{} +)