Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions src/hash/crc32/crc32_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,183 @@ TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
MOVD p+8(FP), R13 // data pointer
MOVD p_len+16(FP), R11 // len(p)

CMP $1024, R11
BLT update

MOVD $0xa741c1bf, R1
MOVD $0xe417f38a, R2
MOVD $0xdd7e3b0c, R3
MOVD $0x8f158014, R4
MOVD $0xdaece73e, R5

FMOVD R1, F5
FMOVD R2, F6
FMOVD R3, F7
FMOVD R4, F8
FMOVD R5, F9

large_loop:
MOVD $0, R1
MOVD $0, R2
MOVD $0, R3
MOVD $0, R4
MOVD $0, R5
MOVD.P 8(R13), R6
CRC32CX R6, R9

ADD $168, R13, R7
ADD $336, R13, R8
ADD $504, R13, R10
ADD $672, R13, R12
ADD $840, R13, R14

LDP.P 16(R13), (R15, R16)
LDP.P 16(R7), (R17, R19)
LDP.P 16(R8), (R20, R21)
LDP.P 16(R10), (R22, R23)
LDP.P 16(R12), (R24, R25)
LDP.P 16(R14), (R26, R27)

MOVD $4, R0

loop_4x:
CRC32CX R15, R9
CRC32CX R17, R1
CRC32CX R20, R2
CRC32CX R22, R3
CRC32CX R24, R4
CRC32CX R26, R5

LDP.P 16(R13), (R15, R22)
LDP.P 16(R7), (R17, R24)
LDP.P 16(R8), (R20, R26)

CRC32CX R16, R9
CRC32CX R19, R1
CRC32CX R21, R2
CRC32CX R23, R3
CRC32CX R25, R4
CRC32CX R27, R5

LDP.P 16(R10), (R16, R23)
LDP.P 16(R12), (R19, R25)
LDP.P 16(R14), (R21, R27)

CRC32CX R15, R9
CRC32CX R17, R1
CRC32CX R20, R2
CRC32CX R16, R3
CRC32CX R19, R4
CRC32CX R21, R5

LDP.P 16(R13), (R15, R16)
LDP.P 16(R7), (R17, R19)
LDP.P 16(R8), (R20, R21)

CRC32CX R22, R9
CRC32CX R24, R1
CRC32CX R26, R2
CRC32CX R23, R3
CRC32CX R25, R4
CRC32CX R27, R5

LDP.P 16(R10), (R22, R23)
LDP.P 16(R12), (R24, R25)
LDP.P 16(R14), (R26, R27)

SUB $1, R0
CBNZ R0, loop_4x

CRC32CX R15, R9
CRC32CX R17, R1
CRC32CX R20, R2
CRC32CX R22, R3
CRC32CX R24, R4
CRC32CX R26, R5

LDP.P 16(R13), (R15, R22)
LDP.P 16(R7), (R17, R24)
LDP.P 16(R8), (R20, R26)

CRC32CX R16, R9
CRC32CX R19, R1
CRC32CX R21, R2
CRC32CX R23, R3
CRC32CX R25, R4
CRC32CX R27, R5

LDP.P 16(R10), (R16, R23)
LDP.P 16(R12), (R19, R25)
LDP.P 16(R14), (R21, R27)

CRC32CX R15, R9
CRC32CX R17, R1
CRC32CX R20, R2
CRC32CX R16, R3
CRC32CX R19, R4
CRC32CX R21, R5

MOVD.P 8(R13), R15
MOVD.P 8(R7), R17
MOVD.P 8(R8), R20

CRC32CX R22, R9
CRC32CX R24, R1
CRC32CX R26, R2
CRC32CX R23, R3
CRC32CX R25, R4
CRC32CX R27, R5

MOVD.P 8(R10), R22
MOVD.P 8(R12), R24
MOVD.P 8(R14), R26

CRC32CX R15, R9
CRC32CX R17, R1
CRC32CX R20, R2
CRC32CX R22, R3
CRC32CX R24, R4
CRC32CX R26, R5

MOVD.P 8(R14), R26
CRC32CX R26, R5

MOVD R14, R13

FMOVD R9, F0
FMOVD R1, F1
FMOVD R2, F2
FMOVD R3, F3
FMOVD R4, F4

VPMULL V0.D1, V5.D1, V0.Q1
VPMULL V1.D1, V6.D1, V1.Q1
VPMULL V2.D1, V7.D1, V2.Q1
VPMULL V3.D1, V8.D1, V3.Q1
VPMULL V4.D1, V9.D1, V4.Q1

FMOVD F0, R9
FMOVD F1, R1
FMOVD F2, R2
FMOVD F3, R3
FMOVD F4, R4

CRC32CX R9, ZR, R9
CRC32CX R1, ZR, R1
CRC32CX R2, ZR, R2
CRC32CX R3, ZR, R3
CRC32CX R4, ZR, R4

EORW R1, R9, R9
EORW R3, R2, R2
EORW R5, R4, R4
EORW R4, R2, R2
EORW R2, R9, R9

SUB $1024, R11
CMP $1024, R11
BGE large_loop

update:
CMP $16, R11
BLT less_than_16
Expand Down