Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backports release 1.11 #57714

Open
wants to merge 11 commits into
base: release-1.11
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion THIRDPARTY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ for exceptions.
- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC]
- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
- [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF]
- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
Expand Down
26 changes: 25 additions & 1 deletion base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3654,7 +3654,31 @@ function _keepat!(a::AbstractVector, m::AbstractVector{Bool})
deleteat!(a, j:lastindex(a))
end

## 1-d circshift ##
"""
circshift!(a::AbstractVector, shift::Integer)

Circularly shift, or rotate, the data in vector `a` by `shift` positions.

# Examples

```jldoctest
julia> circshift!([1, 2, 3, 4, 5], 2)
5-element Vector{Int64}:
4
5
1
2
3

julia> circshift!([1, 2, 3, 4, 5], -2)
5-element Vector{Int64}:
3
4
5
1
2
```
"""
function circshift!(a::AbstractVector, shift::Integer)
n = length(a)
n == 0 && return a
Expand Down
52 changes: 41 additions & 11 deletions base/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,10 @@ function modf(x::T) where T<:IEEEFloat
return (rx, ix)
end

@inline function use_power_by_squaring(n::Integer)
-2^12 <= n <= 3 * 2^13
end

# @constprop aggressive to help the compiler see the switch between the integer and float
# variants for callers with constant `y`
@constprop :aggressive function ^(x::Float64, y::Float64)
Expand All @@ -1152,24 +1156,33 @@ end
y = sign(y)*0x1.8p62
end
yint = unsafe_trunc(Int64, y) # This is actually safe since julia freezes the result
y == yint && return @noinline x^yint
2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x==0
x<0 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
!isfinite(x) && return x*(y>0 || isnan(x)) # x is inf or NaN
yisint = y == yint
if yisint
yint == 0 && return 1.0
use_power_by_squaring(yint) && return @noinline pow_body(x, yint)
end
2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x === +0.0 or -0.0 (Inf * false === 0.0)
s = 1
if x < 0
!yisint && throw_exp_domainerror(x) # y isn't an integer
s = ifelse(isodd(yint), -1, 1)
end
!isfinite(x) && return copysign(x,s)*(y>0 || isnan(x)) # x is inf or NaN
return copysign(pow_body(abs(x), y), s)
end

@assume_effects :foldable @noinline function pow_body(x::Float64, y::Float64)
xu = reinterpret(UInt64, x)
if xu < (UInt64(1)<<52) # x is subnormal
xu = reinterpret(UInt64, x * 0x1p52) # normalize x
xu &= ~sign_mask(Float64)
xu -= UInt64(52) << 52 # mess with the exponent
end
return pow_body(xu, y)
end

@inline function pow_body(xu::UInt64, y::Float64)
logxhi,logxlo = _log_ext(xu)
xyhi, xylo = two_mul(logxhi,y)
xylo = muladd(logxlo, y, xylo)
hi = xyhi+xylo
return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
return @inline Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
end

@constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
Expand All @@ -1193,12 +1206,29 @@ end
return T(exp2(log2(abs(widen(x))) * y))
end

# compensated power by squaring
@constprop :aggressive @inline function ^(x::Float64, n::Integer)
x^clamp(n, Int64)
end
@constprop :aggressive @inline function ^(x::Float64, n::Int64)
n == 0 && return one(x)
return pow_body(x, n)
if use_power_by_squaring(n)
return pow_body(x, n)
else
s = ifelse(x < 0 && isodd(n), -1.0, 1.0)
x = abs(x)
y = float(n)
if y == n
return copysign(pow_body(x, y), s)
else
n2 = n % 1024
y = float(n - n2)
return pow_body(x, y) * copysign(pow_body(x, n2), s)
end
end
end

# compensated power by squaring
# this method is only reliable for -2^20 < n < 2^20 (cf. #53881 #53886)
@assume_effects :terminates_locally @noinline function pow_body(x::Float64, n::Integer)
y = 1.0
xnlo = ynlo = 0.0
Expand Down
5 changes: 3 additions & 2 deletions base/precompilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,16 @@ function ExplicitEnv(envpath::String=Base.active_project())

# Extensions
deps_pkg = get(Dict{String, Any}, pkg_info, "extensions")::Dict{String, Any}
deps_pkg_concrete = Dict{String, Vector{String}}()
for (ext, triggers) in deps_pkg
if triggers isa String
triggers = [triggers]
else
triggers = triggers::Vector{String}
end
deps_pkg[ext] = triggers
deps_pkg_concrete[ext] = triggers
end
extensions[m_uuid] = deps_pkg
extensions[m_uuid] = deps_pkg_concrete

# Determine strategy to find package
lookup_strat = begin
Expand Down
2 changes: 1 addition & 1 deletion base/special/exp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ end
twopk = (k + UInt64(53)) << 52
return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53
end
#k == 1024 && return (small_part * 2.0) * 2.0^1023
k == 1024 && return (small_part * 2.0) * 2.0^1023
end
twopk = Int64(k) << 52
return reinterpret(T, twopk + reinterpret(Int64, small_part))
Expand Down
12 changes: 6 additions & 6 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -512,11 +512,11 @@ prevind(s::AbstractString, i::Int) = prevind(s, i, 1)

function prevind(s::AbstractString, i::Int, n::Int)
n < 0 && throw(ArgumentError("n cannot be negative: $n"))
z = ncodeunits(s) + 1
z = ncodeunits(s)::Int + 1
@boundscheck 0 < i ≤ z || throw(BoundsError(s, i))
n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i)
n == 0 && return thisind(s, i)::Int == i ? i : string_index_err(s, i)
while n > 0 && 1 < i
@inbounds n -= isvalid(s, i -= 1)
@inbounds n -= isvalid(s, i -= 1)::Bool
end
return i - n
end
Expand Down Expand Up @@ -571,11 +571,11 @@ nextind(s::AbstractString, i::Int) = nextind(s, i, 1)

function nextind(s::AbstractString, i::Int, n::Int)
n < 0 && throw(ArgumentError("n cannot be negative: $n"))
z = ncodeunits(s)
z = ncodeunits(s)::Int
@boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i))
n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i)
n == 0 && return thisind(s, i)::Int == i ? i : string_index_err(s, i)
while n > 0 && i < z
@inbounds n -= isvalid(s, i += 1)
@inbounds n -= isvalid(s, i += 1)::Bool
end
return i + n
end
Expand Down
7 changes: 1 addition & 6 deletions base/summarysize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,8 @@ function (ss::SummarySize)(obj::GenericMemory)
datakey = unsafe_convert(Ptr{Cvoid}, obj)
if !haskey(ss.seen, datakey)
ss.seen[datakey] = true
dsize = sizeof(obj)
size += sizeof(obj)
T = eltype(obj)
if isbitsunion(T)
# add 1 union selector byte for each element
dsize += length(obj)
end
size += dsize
if !isempty(obj) && T !== Symbol && (!Base.allocatedinline(T) || (T isa DataType && !Base.datatype_pointerfree(T)))
push!(ss.frontier_x, obj)
push!(ss.frontier_i, 1)
Expand Down
24 changes: 15 additions & 9 deletions doc/src/manual/calling-c-and-fortran-code.md
Original file line number Diff line number Diff line change
Expand Up @@ -547,15 +547,14 @@ is not valid, since the type layout of `T` is not known statically.

### SIMD Values

Note: This feature is currently implemented on 64-bit x86 and AArch64 platforms only.

If a C/C++ routine has an argument or return value that is a native SIMD type, the corresponding
Julia type is a homogeneous tuple of `VecElement` that naturally maps to the SIMD type. Specifically:

> * The tuple must be the same size as the SIMD type. For example, a tuple representing an `__m128`
> on x86 must have a size of 16 bytes.
> * The element type of the tuple must be an instance of `VecElement{T}` where `T` is a primitive type that
> is 1, 2, 4 or 8 bytes.
> * The tuple must be the same size and elements as the SIMD type. For example, a tuple
> representing an `__m128` on x86 must have a size of 16 bytes and Float32 elements.
> * The element type of the tuple must be an instance of `VecElement{T}` where `T` is a
> primitive type with a power-of-two number of bytes (e.g. 1, 2, 4, 8, 16, etc) such as
> Int8 or Float64.

For instance, consider this C routine that uses AVX intrinsics:

Expand Down Expand Up @@ -628,6 +627,10 @@ For translating a C argument list to Julia:

* `T`, where `T` is a Julia leaf type
* argument value will be copied (passed by value)
* `vector T` (or `__attribute__ vector_size`, or a typedef such as `__m128`)

* `NTuple{N, VecElement{T}}`, where `T` is a primitive Julia type of the correct size
and N is the number of elements in the vector (equal to `vector_size / sizeof T`).
* `void*`

* depends on how this parameter is used, first translate this to the intended pointer type, then
Expand Down Expand Up @@ -674,13 +677,16 @@ For translating a C return type to Julia:
* `T`, where `T` is one of the primitive types: `char`, `int`, `long`, `short`, `float`, `double`,
`complex`, `enum` or any of their `typedef` equivalents

* `T`, where `T` is an equivalent Julia Bits Type (per the table above)
* if `T` is an `enum`, the argument type should be equivalent to `Cint` or `Cuint`
* same as C argument list
* argument value will be copied (returned by-value)
* `struct T` (including typedef to a struct)

* `T`, where `T` is a Julia Leaf Type
* same as C argument list
* argument value will be copied (returned by-value)

* `vector T`

* same as C argument list
* `void*`

* depends on how this parameter is used, first translate this to the intended pointer type, then
Expand Down
7 changes: 4 additions & 3 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1971,7 +1971,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
else if (!alignment)
alignment = julia_alignment(jltype);
if (intcast && Order == AtomicOrdering::NotAtomic) {
emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, Align(alignment), intcast->getAlign());
emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, intcast->getAlign(), Align(alignment));
}
else {
if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
Expand Down Expand Up @@ -3214,7 +3214,7 @@ static void union_alloca_type(jl_uniontype_t *ut,
[&](unsigned idx, jl_datatype_t *jt) {
if (!jl_is_datatype_singleton(jt)) {
size_t nb1 = jl_datatype_size(jt);
size_t align1 = jl_datatype_align(jt);
size_t align1 = julia_alignment((jl_value_t*)jt);
if (nb1 > nbytes)
nbytes = nb1;
if (align1 > align)
Expand Down Expand Up @@ -3796,9 +3796,10 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg

// whether we should perform the initialization with the struct as a IR value
// or instead initialize the stack buffer with stores
// although we do the former if it is a vector or could be a vector element
auto tracked = CountTrackedPointers(lt);
bool init_as_value = false;
if (lt->isVectorTy() || jl_is_vecelement_type(ty)) { // maybe also check the size ?
if (lt->isVectorTy() || jl_special_vector_alignment(1, ty) != 0) {
init_as_value = true;
}
else if (tracked.count) {
Expand Down
2 changes: 2 additions & 0 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8552,6 +8552,8 @@ static jl_llvm_functions_t
Type *RT = Arg->getParamStructRetType();
TypeSize sz = DL.getTypeAllocSize(RT);
Align al = DL.getPrefTypeAlign(RT);
if (al > MAX_ALIGN)
al = Align(MAX_ALIGN);
param.addAttribute(Attribute::NonNull);
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
param.addDereferenceableAttr(sz);
Expand Down
27 changes: 22 additions & 5 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
}

// Determine if homogeneous tuple with fields of type t will have
// a special alignment beyond normal Julia rules.
// a special alignment and vector-ABI beyond normal rules for aggregates.
// Return special alignment if one exists, 0 if normal alignment rules hold.
// A non-zero result *must* match the LLVM rules for a vector type <nfields x t>.
// Matching the compiler's `__attribute__ vector_size` behavior.
// For sake of Ahead-Of-Time (AOT) compilation, this routine has to work
// without LLVM being available.
unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
Expand All @@ -315,8 +316,12 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
// motivating use case comes up for Julia, we reject pointers.
return 0;
size_t elsz = jl_datatype_size(ty);
if (elsz != 1 && elsz != 2 && elsz != 4 && elsz != 8)
// Only handle power-of-two-sized elements (for now)
if (next_power_of_two(elsz) != elsz)
// Only handle power-of-two-sized elements (for now), since other
// lengths may be packed into very complicated arrangements (llvm pads
// extra bits on most platforms when computing alignment but not when
// computing type size, but adds no extra bytes for each element, so
// their effect on offsets are never what you may naturally expect).
return 0;
size_t size = nfields * elsz;
// Use natural alignment for this vector: this matches LLVM and clang.
Expand Down Expand Up @@ -707,9 +712,9 @@ void jl_compute_field_offsets(jl_datatype_t *st)
}
else {
fsz = sizeof(void*);
if (fsz > MAX_ALIGN)
fsz = MAX_ALIGN;
al = fsz;
if (al > MAX_ALIGN)
al = MAX_ALIGN;
desc[i].isptr = 1;
zeroinit = 1;
npointers++;
Expand Down Expand Up @@ -929,6 +934,18 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
uint32_t nbytes = (nbits + 7) / 8;
uint32_t alignm = next_power_of_two(nbytes);
# if defined(_CPU_X86_) && !defined(_OS_WINDOWS_)
// datalayout strings are often weird: on 64-bit they usually follow fairly simple rules,
// but on x86 32 bit platforms, sometimes 5 to 8 byte types are
// 32-bit aligned even though the MAX_ALIGN (for types 9+ bytes) is 16
// (except for f80 which is align 4 on Mingw, Linux, and BSDs--but align 16 on MSVC and Darwin)
// https://llvm.org/doxygen/ARMTargetMachine_8cpp.html#adb29b487708f0dc2a940345b68649270
// https://llvm.org/doxygen/AArch64TargetMachine_8cpp.html#a003a58caf135efbf7273c5ed84e700d7
// https://llvm.org/doxygen/X86TargetMachine_8cpp.html#aefdbcd6131ef195da070cef7fdaf0532
// 32-bit alignment is weird
if (alignm == 8)
alignm = 4;
# endif
if (alignm > MAX_ALIGN)
alignm = MAX_ALIGN;
// memoize isprimitivetype, since it is much easier than checking
Expand Down
6 changes: 4 additions & 2 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2331,8 +2331,10 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
// strip all constant alias information, as it might depend on the gc having
// preserved a gc root, which stops being true after this pass (#32215)
// similar to RewriteStatepointsForGC::stripNonValidData, but less aggressive
if (I->getMetadata(LLVMContext::MD_invariant_load))
I->setMetadata(LLVMContext::MD_invariant_load, NULL);
if (auto *LI = dyn_cast<LoadInst>(I)){
if (isSpecialPtr(LI->getPointerOperand()->getType()) && LI->getMetadata(LLVMContext::MD_invariant_load))
LI->setMetadata(LLVMContext::MD_invariant_load, NULL);
}
if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) {
if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const", "jtbaa_memoryptr", "jtbaa_memorylen", "tbaa_memoryown"})) {
MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA);
Expand Down
Loading