[llvm] [AArch64] Adjust the scheduling info of SVE FCMP on Cortex-A510. (PR #153810)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 07:10:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
According to the SWOG, these have a lower throughput than other instructions. Mark them as taking multiple cycles to model that.
---
Patch is 294.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153810.diff
10 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedA510.td (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-bf16-converts.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll (+50-50)
- (modified) llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll (+33-33)
- (modified) llvm/test/CodeGen/AArch64/sve-llrint.ll (+1030-1031)
- (modified) llvm/test/CodeGen/AArch64/sve-lrint.ll (+1030-1031)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll (+21-21)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll (+74-74)
- (modified) llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll (+12-12)
- (modified) llvm/test/tools/llvm-mca/AArch64/Cortex/A510-sve-instructions.s (+103-103)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index b93d67f3091e7..356e3fa39c53f 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -1016,7 +1016,7 @@ def : InstRW<[CortexA510MCWrite<16, 13, CortexA510UnitVALU>], (instrs FADDA_VPZ_
def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU>], (instrs FADDA_VPZ_D)>;
// Floating point compare
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+def : InstRW<[CortexA510MCWrite<4, 2, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]",
"^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
"^FCM(LE|LT)_PPzZ0_[HSD]",
"^FCMUO_PPzZZ_[HSD]")>;
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
index d63f7e6f3242e..120ab7cc4552e 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
@@ -171,11 +171,11 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %
; NOBF16-NEXT: ptrue p0.s
; NOBF16-NEXT: and z3.s, z3.s, #0x1
; NOBF16-NEXT: and z4.s, z4.s, #0x1
-; NOBF16-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s
; NOBF16-NEXT: add z5.s, z1.s, z2.s
; NOBF16-NEXT: add z2.s, z0.s, z2.s
-; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
+; NOBF16-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s
; NOBF16-NEXT: orr z1.s, z1.s, #0x400000
+; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
; NOBF16-NEXT: orr z0.s, z0.s, #0x400000
; NOBF16-NEXT: add z3.s, z3.s, z5.s
; NOBF16-NEXT: add z2.s, z4.s, z2.s
diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
index 43744092a1348..71108f00a0054 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -94,8 +94,8 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.s
+; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
@@ -264,37 +264,37 @@ define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
; CHECK-NEXT: mov z6.d, #0xffffffff80000000
; CHECK-NEXT: movk x8, #16863, lsl #48
; CHECK-NEXT: mov z7.d, #0xffffffff80000000
-; CHECK-NEXT: mov z24.d, #0xffffffff80000000
-; CHECK-NEXT: mov z25.d, x8
-; CHECK-NEXT: fcmuo p6.d, p0/z, z0.d, z0.d
+; CHECK-NEXT: mov z25.d, #0x7fffffff
+; CHECK-NEXT: mov z24.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d
; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, z4.d
-; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, z4.d
-; CHECK-NEXT: mov z4.d, #0x7fffffff
-; CHECK-NEXT: fcmgt p5.d, p0/z, z2.d, z25.d
+; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z24.d
; CHECK-NEXT: fcvtzs z5.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z25.d
+; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z24.d
; CHECK-NEXT: fcvtzs z6.d, p2/m, z0.d
+; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
+; CHECK-NEXT: mov z4.d, #0xffffffff80000000
; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.d
-; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z25.d
-; CHECK-NEXT: fcmgt p3.d, p0/z, z3.d, z25.d
-; CHECK-NEXT: fcvtzs z24.d, p4/m, z2.d
-; CHECK-NEXT: fcmuo p4.d, p0/z, z1.d, z1.d
-; CHECK-NEXT: sel z0.d, p1, z4.d, z5.d
-; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fcmgt p5.d, p0/z, z2.d, z24.d
+; CHECK-NEXT: sel z1.d, p1, z25.d, z5.d
+; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z24.d
+; CHECK-NEXT: fcvtzs z4.d, p2/m, z2.d
+; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d
+; CHECK-NEXT: sel z0.d, p4, z25.d, z6.d
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0
+; CHECK-NEXT: fcmuo p6.d, p0/z, z3.d, z3.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d
-; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d
-; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d
-; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d
+; CHECK-NEXT: sel z2.d, p1, z25.d, z7.d
+; CHECK-NEXT: sel z3.d, p5, z25.d, z4.d
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0
-; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: uzp1 z1.s, z3.s, z2.s
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -348,41 +348,41 @@ define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
; CHECK-NEXT: mov z5.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
-; CHECK-NEXT: mov z6.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: movk x8, #16607, lsl #48
; CHECK-NEXT: mov z7.d, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT: movk x8, #16607, lsl #48
+; CHECK-NEXT: mov z24.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z25.d, #32767 // =0x7fff
-; CHECK-NEXT: mov z24.d, x8
-; CHECK-NEXT: fcmuo p6.d, p0/z, z2.d, z2.d
+; CHECK-NEXT: mov z6.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d
; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: mov z4.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z24.d
+; CHECK-NEXT: fcmgt p4.d, p0/z, z2.d, z6.d
; CHECK-NEXT: fcvtzs z5.d, p1/m, z3.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z24.d
-; CHECK-NEXT: fcvtzs z6.d, p2/m, z2.d
-; CHECK-NEXT: fcvtzs z7.d, p3/m, z1.d
-; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z24.d
-; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z24.d
-; CHECK-NEXT: fcvtzs z4.d, p4/m, z0.d
-; CHECK-NEXT: fcmuo p4.d, p0/z, z3.d, z3.d
-; CHECK-NEXT: sel z2.d, p1, z25.d, z5.d
-; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p2/m, z2.d
+; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d
+; CHECK-NEXT: fcvtzs z24.d, p3/m, z1.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: mov z3.d, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z6.d
+; CHECK-NEXT: sel z4.d, p1, z25.d, z5.d
+; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z6.d
+; CHECK-NEXT: fcvtzs z3.d, p2/m, z0.d
+; CHECK-NEXT: fcmuo p2.d, p0/z, z2.d, z2.d
+; CHECK-NEXT: fcmuo p6.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p2, z25.d, z6.d
-; CHECK-NEXT: sel z1.d, p3, z25.d, z7.d
-; CHECK-NEXT: sel z3.d, p5, z25.d, z4.d
-; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0
+; CHECK-NEXT: sel z0.d, p4, z25.d, z7.d
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: mov z0.d, p6/m, #0 // =0x0
-; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
+; CHECK-NEXT: sel z1.d, p1, z25.d, z24.d
+; CHECK-NEXT: sel z2.d, p5, z25.d, z3.d
+; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0
+; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s
-; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s
+; CHECK-NEXT: uzp1 z0.s, z0.s, z4.s
+; CHECK-NEXT: uzp1 z1.s, z2.s, z1.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -535,8 +535,8 @@ define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
@@ -556,8 +556,8 @@ define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z1.h, #32767 // =0x7fff
-; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
; CHECK-NEXT: fcvtzs z2.h, p1/m, z0.h
+; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
; CHECK-NEXT: sel z0.h, p2, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
index 1df28198711e1..123f6c55c20ab 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
@@ -49,16 +49,16 @@ define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
; CHECK-LABEL: test_signed_v8f32_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
+; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: mov z4.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0
; CHECK-NEXT: fcvtzu z2.s, p1/m, z0.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s
-; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s
; CHECK-NEXT: fcvtzu z3.s, p2/m, z1.s
+; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s
; CHECK-NEXT: mov z2.s, p1/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z0.d, z2.d
@@ -95,13 +95,13 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0
; CHECK-NEXT: mov z4.s, w8
+; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcvtzu z2.s, p1/m, z1.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z4.s
; CHECK-NEXT: mov z1.s, #65535 // =0xffff
-; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z4.s
; CHECK-NEXT: fcvtzu z3.s, p2/m, z0.s
+; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z4.s
; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s
; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
@@ -141,8 +141,8 @@ define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0
; CHECK-NEXT: fcvtzu z0.d, p1/m, z2.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z4.s
-; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z4.s
; CHECK-NEXT: fcvtzu z1.d, p2/m, z3.s
+; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z4.s
; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
@@ -187,13 +187,13 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcvtzu z2.d, p1/m, z1.d
; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
; CHECK-NEXT: mov z1.d, #0xffffffff
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
; CHECK-NEXT: fcvtzu z3.d, p2/m, z0.d
+; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
@@ -213,29 +213,29 @@ define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
; CHECK-NEXT: movi v4.2d, #0000000000000000
+; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: movi v5.2d, #0000000000000000
; CHECK-NEXT: movi v6.2d, #0000000000000000
-; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
+; CHECK-NEXT: movi v24.2d, #0000000000000000
+; CHECK-NEXT: mov z7.d, x8
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0
-; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: mov z24.d, x8
; CHECK-NEXT: fcvtzu z4.d, p1/m, z1.d
; CHECK-NEXT: fcvtzu z5.d, p2/m, z0.d
; CHECK-NEXT: fcvtzu z6.d, p3/m, z3.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z24.d
-; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z24.d
-; CHECK-NEXT: mov z0.d, #0xffffffff
-; CHECK-NEXT: fcvtzu z7.d, p4/m, z2.d
-; CHECK-NEXT: fcmgt p3.d, p0/z, z3.d, z24.d
+; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z7.d
+; CHECK-NEXT: fcvtzu z24.d, p4/m, z2.d
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z24.d
+; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z7.d
+; CHECK-NEXT: mov z0.d, #0xffffffff
+; CHECK-NEXT: fcmgt p3.d, p0/z, z3.d, z7.d
+; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z7.d
; CHECK-NEXT: sel z1.d, p1, z0.d, z4.d
; CHECK-NEXT: sel z2.d, p2, z0.d, z5.d
; CHECK-NEXT: sel z3.d, p3, z0.d, z6.d
-; CHECK-NEXT: sel z4.d, p0, z0.d, z7.d
+; CHECK-NEXT: sel z4.d, p0, z0.d, z24.d
; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s
; CHECK-NEXT: uzp1 z1.s, z4.s, z3.s
; CHECK-NEXT: addvl sp, sp, #1
@@ -254,13 +254,13 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcvtzu z2.d, p1/m, z1.d
; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
; CHECK-NEXT: fcvtzu z3.d, p2/m, z0.d
+; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
@@ -280,29 +280,29 @@ define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
; CHECK-NEXT: movi v4.2d, #0000000000000000
+; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: movi v5.2d, #0000000000000000
; CHECK-NEXT: movi v6.2d, #0000000000000000
-; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0
+; CHECK-NEXT: movi v24.2d, #0000000000000000
+; CHECK-NEXT: mov z7.d, x8
; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0
; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0
-; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0
-; CHECK-NEXT: mov z24.d, x8
; CHECK-NEXT: fcvtzu z4.d, p1/m, z3.d
; CHECK-NEXT: fcvtzu z5.d, p2/m, z2.d
; CHECK-NEXT: fcvtzu z6.d, p3/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z24.d
-; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z24.d
-; CHECK-NEXT: mov z2.d, #65535 // =0xffff
-; CHECK-NEXT: fcvtzu z7.d, p4/m, z0.d
-; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z24.d
+; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z7.d
+; CHECK-NEXT: fcvtzu z24.d, p4/m, z0.d
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z24.d
+; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z7.d
+; CHECK-NEXT: mov z2.d, #65535 // =0xffff
+; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z7.d
+; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z7.d
; CHECK-NEXT: sel z0.d, p1, z2.d, z4.d
; CHECK-NEXT: sel z1.d, p2, z2.d, z5.d
; CHECK-NEXT: sel z3.d, p3, z2.d, z6.d
-; CHECK-NEXT: sel z2.d, p0, z2.d, z7.d
+; CHECK-NEXT: sel z2.d, p0, z2.d, z24.d
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
@@ -334,16 +334,16 @@ define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
; CHECK-LABEL: test_signed_v4f64_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff
+; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcvtzu z2.d, p1/m, z0.d
; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d
; CHECK-NEXT: fcvtzu z3.d, p2/m, z1.d
+; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d
; CHECK-NEXT: mov z2.d, p1/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z0.d, z2.d
@@ -412,8 +412,8 @@ define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
; CHECK-NEXT: fcvtzu z0.s, p1/m, z2.h
; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z4.h
-; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
; CHECK-NEXT: fcvtzu z1.s, p2/m, z3.h
+; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
; CHECK-NEXT: mov z0.s, p1/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
@@ -486,8 +486,8 @@ define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
; CHECK-NEXT: fcvtzu z0.d, p1/m, z2.h
; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z4.h
-; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
; CHECK-NEXT: fcvtzu z1.d, p2/m, z3.h
+; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll
index 12d49183edea4..d5a4838ff9687 100644
--- a/llvm/test/CodeGen/AArch64/sve-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll
@@ -6,17 +6,17 @@ define <vscale x 1 x i64> @llrint_v1i64_v1f16(<vscale x 1 x half> %x) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, #64511 // =0xfbff
+; CHECK-NEXT: mov z2.d, #0x8000000000000000
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: frintx z0.h, p0/m, z0.h
-; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.d, #0x8000000000000000
-; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h
-; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d
+; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f16(<vscale x 1 x half> %x)
@@ -29,17 +29,17 @@ define <vscale x 2 x i64> @llrint_v1i64_v2f16(<vscale x 2 x half> %x) {
; CHECK: // %bb.0:
; CHECK-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/153810
More information about the llvm-commits
mailing list