-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISC-V] Bit counting intrinsics #114337
[RISC-V] Bit counting intrinsics #114337
Conversation
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch |
No regressions. Diffs are based on 170,565 contexts (22,709 MinOpts, 147,856 FullOpts). MISSED contexts: 1,020 (0.59%) Overall (-14,440 bytes)
MinOpts (-180 bytes)
FullOpts (-14,260 bytes)
Example diffslinux.riscv64.Checked.mch-36 (-50.00%) : 12110.dasm - System.Numerics.Tensors.TensorPrimitives+LeadingZeroCountOperator`1[int]:Invoke(int):int (FullOpts)@@ -5,12 +5,10 @@
; fp based frame
; partially interruptible
; No matching PGO data
-; 0 inlinees with PGO data; 1 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T00] ( 4, 3.50) int -> a0 single-def
+; V00 arg0 [V00,T00] ( 3, 3 ) int -> a0 single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
-; V02 tmp1 [V02,T01] ( 3, 2 ) int -> a0 "Inline return value spill temp"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -22,28 +20,15 @@ G_M40987_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M40987_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- sext.w t6, a0
- beqz t6, G_M40987_IG04
- ;; size=8 bbWeight=1 PerfScore 4.00
-G_M40987_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- slli a1, a1, 7
- ld a1, 0xD1FFAB1E(a1)
- jalr a1 // System.Numerics.BitOperations:Log2SoftwareFallback(uint):int
- xori a0, a0, 31
- j G_M40987_IG05
- ;; size=28 bbWeight=0.50 PerfScore 4.25
-G_M40987_IG04: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- addi a0, zero, 0xD1FFAB1E
- ;; size=4 bbWeight=0.50 PerfScore 0.25
-G_M40987_IG05: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
+ clzw a0, a0
+ ;; size=4 bbWeight=1 PerfScore 0.50
+G_M40987_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 72, prolog size 16, PerfScore 25.00, instruction count 18, allocated bytes for code 72 (MethodHash=a4435fe4) for method System.Numerics.Tensors.TensorPrimitives+LeadingZeroCountOperator`1[int]:Invoke(int):int (FullOpts)
+; Total bytes of code 36, prolog size 16, PerfScore 17.00, instruction count 9, allocated bytes for code 36 (MethodHash=a4435fe4) for method System.Numerics.Tensors.TensorPrimitives+LeadingZeroCountOperator`1[int]:Invoke(int):int (FullOpts)
; ============================================================
Unwind Info:
@@ -54,7 +39,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 18 (0x00012) Actual length = 72 (0x000048)
+ Function Length : 9 (0x00009) Actual length = 36 (0x000024)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) -36 (-45.00%) : 32994.dasm - System.Number+BigInteger:CountSignificantBits(uint):uint (FullOpts)@@ -5,12 +5,10 @@
; fp based frame
; partially interruptible
; No matching PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T00] ( 4, 3.50) int -> a0 single-def
+; V00 arg0 [V00,T00] ( 3, 3 ) int -> a0 single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
-; V02 tmp1 [V02,T01] ( 3, 2 ) int -> a0 "Inline return value spill temp"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -22,32 +20,17 @@ G_M36112_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M36112_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- sext.w t6, a0
- beqz t6, G_M36112_IG04
- ;; size=8 bbWeight=1 PerfScore 4.00
-G_M36112_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- slli a1, a1, 7
- ld a1, 0xD1FFAB1E(a1)
- jalr a1 // System.Numerics.BitOperations:Log2SoftwareFallback(uint):int
- xori a0, a0, 31
- j G_M36112_IG05
- ;; size=28 bbWeight=0.50 PerfScore 4.25
-G_M36112_IG04: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- addi a0, zero, 0xD1FFAB1E
- ;; size=4 bbWeight=0.50 PerfScore 0.25
-G_M36112_IG05: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
+ clzw a0, a0
subw a0, zero, a0
addiw a0, a0, 0xD1FFAB1E
- ;; size=8 bbWeight=1 PerfScore 1.00
-G_M36112_IG06: ; bbWeight=1, epilog, nogc, extend
+ ;; size=12 bbWeight=1 PerfScore 1.50
+G_M36112_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 80, prolog size 16, PerfScore 26.00, instruction count 20, allocated bytes for code 80 (MethodHash=581172ef) for method System.Number+BigInteger:CountSignificantBits(uint):uint (FullOpts)
+; Total bytes of code 44, prolog size 16, PerfScore 18.00, instruction count 11, allocated bytes for code 44 (MethodHash=581172ef) for method System.Number+BigInteger:CountSignificantBits(uint):uint (FullOpts)
; ============================================================
Unwind Info:
@@ -58,7 +41,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 20 (0x00014) Actual length = 80 (0x000050)
+ Function Length : 11 (0x0000b) Actual length = 44 (0x00002c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) -24 (-40.00%) : 12401.dasm - System.Numerics.Tensors.TensorPrimitives+PopCountOperator`1[long]:Invoke(long):long (FullOpts)@@ -5,7 +5,6 @@
; fp based frame
; partially interruptible
; No matching PGO data
-; 0 inlinees with PGO data; 1 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 3, 3 ) long -> a0 single-def
@@ -21,21 +20,15 @@ G_M15459_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M15459_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- slli a1, a1, 31
- srli a1, a1, 26
- ld a1, 0xD1FFAB1E(a1)
- jalr a1 // System.Numerics.BitOperations:<PopCount>g__SoftwareFallback|23_0(ulong):int
- slliw a0, a0, 0
- ;; size=28 bbWeight=1 PerfScore 7.50
+ cpop a0, a0
+ ;; size=4 bbWeight=1 PerfScore 0.50
G_M15459_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 60, prolog size 16, PerfScore 24.00, instruction count 15, allocated bytes for code 60 (MethodHash=1bb0c39c) for method System.Numerics.Tensors.TensorPrimitives+PopCountOperator`1[long]:Invoke(long):long (FullOpts)
+; Total bytes of code 36, prolog size 16, PerfScore 17.00, instruction count 9, allocated bytes for code 36 (MethodHash=1bb0c39c) for method System.Numerics.Tensors.TensorPrimitives+PopCountOperator`1[long]:Invoke(long):long (FullOpts)
; ============================================================
Unwind Info:
@@ -46,7 +39,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 15 (0x0000f) Actual length = 60 (0x00003c)
+ Function Length : 9 (0x00009) Actual length = 36 (0x000024)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) +0 (0.00%) : 171568.dasm - ILGEN_0x2193e4b5:Method_0xeffa27b5(ubyte,long,float,float,byte,double,uint,ubyte,short):int (FullOpts)No diffs found? +0 (0.00%) : 171536.dasm - Generated442:StructConstrainedInterfaceCallsTest() (FullOpts)No diffs found? +0 (0.00%) : 171472.dasm - ValueNumberingCheckedCastsOfConstants:g__ConfirmUInt64OneDecrementUnderUInt64MaxValueCastToUInt32Overflows|97_24() (FullOpts)No diffs found? DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze output |
RISC-V Release-CLR-VF2: 9531 / 9551 (99.79%)
Release-CLR-VF2.md, Release-CLR-VF2.xml, testclr_output.tar.gz Build information and commandsGIT: RISC-V Release-CLR-QEMU: 9531 / 9551 (99.79%)
Release-CLR-QEMU.md, Release-CLR-QEMU.xml, testclr_output.tar.gz Build information and commandsGIT: RISC-V Release-FX-VF2: 628462 / 660362 (95.17%)
Build information and commandsGIT: RISC-V Release-FX-QEMU: 643107 / 667247 (96.38%)
Release-FX-QEMU.md, Release-FX-QEMU.xml, testfx_output.tar.gz Build information and commandsGIT: |
<testsuite name="readytorun.determinism.crossgen2determinism" time="1.970">
<testcase name="crossgen2determinism.sh" classname="readytorun.determinism.crossgen2determinism.crossgen2determinism" result="FAILED" time="1.970">
<failure message="Unknown exit code 101." type="exitcode_101">Could not find file '/var/lib/go-agent/pipelines/Release-CLR-VF2/coreclr.Release/readytorun/determinism/crossgen2determinism/crossgen2smoke1.ildll'.
File name: '/var/lib/go-agent/pipelines/Release-CLR-VF2/coreclr.Release/readytorun/determinism/crossgen2determinism/crossgen2smoke1.ildll'
at Interop.ThrowExceptionForIoErrno(ErrorInfo errorInfo, String path, Boolean isDirError)
at Microsoft.Win32.SafeHandles.SafeFileHandle.Open(String path, OpenFlags flags, Int32 mode, Boolean failForSymlink, Boolean& wasSymlink, Func`4 createOpenException)
at Microsoft.Win32.SafeHandles.SafeFileHandle.Open(String fullPath, FileMode mode, FileAccess access, FileShare share, FileOptions options, Int64 preallocationSize, UnixFileMode openPermissions, Int64& fileLength, UnixFileMode& filePermissions, Boolean failForSymlink, Boolean& wasSymlink, Func`4 createOpenException)
at System.IO.File.OpenHandle(String path, FileMode mode, FileAccess access, FileShare share, FileOptions options, Int64 preallocationSize)
at System.IO.File.ReadAllBytes(String path)
at Program.TestEntryPoint() in /godata/pipelines/Release-build/runtime/src/tests/readytorun/determinism/Program.cs:line 13
at __GeneratedMainWrapper.Main() in /godata/pipelines/Release-build/runtime/artifacts/tests/coreclr/obj/linux.riscv64.Release/Managed/readytorun/determinism/crossgen2determinism/XUnitWrapperGenerator/XUnitWrapperGenerator.XUnitWrapperGenerator/SimpleRunner.g.cs:line 7 do we know if that path is wrong on the test runner or the file wasn't generated by the build? File should be generated at build time
|
The file is built, it does pass locally. Looks like it's not collected or at wrong path for the test run. @sirntar will be working on it. |
@am11 The file is generated at build time, but isn't sent to CI tests runners. I am investigating this at the moment. It's probably some packing issue... |
(Leading|Trailing)ZeroCount, PopCount.
Part of #84834, cc @dotnet/samsung