Skip to content

No method matching _vstore_unroll! on ARM #543

Open
@benegee

Description

@benegee

We are using @turbo extensively in Trixi.jl.
Recently, we have started running our code on ARM-based machines and encountered the following error:

LoadError: MethodError: no method matching _vstore_unroll!(::LayoutPointers.StridedPointer{Float64, 4, 1, 0, (1, 2, 3, 4), Tuple{Static.StaticInt{8}, Static.StaticInt{8}, Static.StaticInt{40}, Static.StaticInt{200}}, NTuple{4, Static.StaticInt{0}}}, ::VectorizationBase.VecUnroll{4, 1, Float64, VectorizationBase.VecUnroll{4, 1, Float64, Float64}}, ::VectorizationBase.Unroll{2, 1, 5, 1, 1, 0x0000000000000000, 1, VectorizationBase.Unroll{4, 1, 5, 1, 1, 0x0000000000000000, 1, Static.StaticInt{0}}}, ::Static.False, ::Static.False, ::Static.False, ::Static.StaticInt{16}, ::Static.StaticInt{8})
  
  Closest candidates are:
    _vstore_unroll!(::LayoutPointers.AbstractStridedPointer{T1, D, C, B, R, X, O} where {B, R, X<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}, O<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}}, ::VectorizationBase.VecUnroll{<:Any, W, T2, <:VectorizationBase.VecUnroll{<:Any, W, T2, VectorizationBase.Vec{W, T2}}}, ::UU, ::M, ::A, ::S, ::NT, ::Static.StaticInt{RS}, ::SVUS) where {T1, D, C, W, T2, UU, A, S, NT, RS, SVUS, M}
     @ VectorizationBase ~/.julia/packages/VectorizationBase/LqJbS/src/vecunroll/memory.jl:2552
    _vstore_unroll!(::LayoutPointers.AbstractStridedPointer{T, D, C, B, R, X, O} where {B, R, X<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}, O<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}}, ::VectorizationBase.VecUnroll{<:Any, W, T, <:VectorizationBase.VecUnroll{<:Any, W, T, VectorizationBase.Vec{W, T}}}, ::UU, ::A, ::S, ::NT, ::Static.StaticInt{RS}, ::Static.StaticInt{SVUS}) where {W, T, A<:Static.StaticBool, S<:Static.StaticBool, NT<:Static.StaticBool, RS, D, C, SVUS, UU<:(VectorizationBase.Unroll{AUO, FO, NO, AV, W, MO, X, VectorizationBase.Unroll{AUI, FI, NI, AV, W, MI, X, I}} where {AV, X, I, AUO, FO, NO, MO, AUI, FI, NI, MI})}
     @ VectorizationBase ~/.julia/packages/VectorizationBase/LqJbS/src/vecunroll/memory.jl:2575
    _vstore_unroll!(::LayoutPointers.AbstractStridedPointer{T1, D, C, B, R, X, O} where {B, R, X<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}, O<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}}, ::VectorizationBase.VecUnroll{<:Any, W, T2, <:VectorizationBase.VecUnroll{<:Any, W, T2, VectorizationBase.Vec{W, T2}}}, ::UU, ::A, ::S, ::NT, ::Static.StaticInt{RS}, ::SVUS) where {T1, D, C, W, T2, UU, A, S, NT, RS, SVUS}
     @ VectorizationBase ~/.julia/packages/VectorizationBase/LqJbS/src/vecunroll/memory.jl:2531
    ...

This was caused by one of our helper functions, which basically does a specialized matrix-vector multiplication. We were able to reproduce this issue with this example:

MWE
using StaticArrays
using StrideArrays: PtrArray, StaticInt
using LoopVectorization: @turbo

function multiply_dimensionwise!(data_out, matrix)

    tmp = zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 2), size(matrix, 2))

    @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)

        res = zero(eltype(data_out))
        for kk in axes(matrix, 2)
            res += matrix[k, kk] * tmp[v, i, j, kk]
        end
        data_out[v, i, j, k] = res
    end

    return nothing
end

dims = 3
nodes = 5 # important!
els = 1

test_u = fill(2.0, nodes^dims * els)
test_ptr = PtrArray(pointer(test_u), (StaticInt(1), ntuple(_ -> StaticInt(nodes), dims)..., els))
test_mat = fill(1.0, nodes, nodes)
test_smat = SMatrix{nodes, nodes}(test_mat)

multiply_dimensionwise!(view(test_ptr, :, :, :, :, 1), test_smat)

Xref: trixi-framework/Trixi.jl#2075

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions