[llvm] [AArch64] Optimise MOVI + CMGT to CMGE (PR #74499)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 09:22:15 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sjoerd Meijer (sjoerdmeijer)
<details>
<summary>Changes</summary>
This fixes a regression that occured for a pattern of MOVI + CMGT instructions, which can be optimised to CMGE. I.e., when the signed greater than compare has -1 as an operand, we can rewrite that as a compare greater equal than 0, which is what CMGE does.
Fixes #<!-- -->61836
---
Full diff: https://github.com/llvm/llvm-project/pull/74499.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+3)
- (modified) llvm/test/CodeGen/AArch64/cmp-select-sign.ll (+6-8)
- (modified) llvm/test/CodeGen/AArch64/signbit-shift.ll (+11-13)
- (modified) llvm/test/CodeGen/AArch64/vselect-ext.ll (+87-91)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f36607b03e76f..01c1a9660eb0a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13710,6 +13710,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
APInt UndefBits(VT.getSizeInBits(), 0);
bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
bool IsZero = IsCnst && (CnstBits == 0);
+ bool IsMinusOne = IsCnst && CnstBits.isAllOnes();
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
@@ -13778,6 +13779,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
+ if (IsMinusOne)
+ return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS, RHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (IsZero)
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index d16b5786a9965..09a6e26fe5a40 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -176,12 +176,11 @@ define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
-; CHECK-NEXT: orr v2.4s, #1
-; CHECK-NEXT: cmgt v1.4s, v0.4s, v1.4s
-; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: xtn v0.4h, v1.4s
+; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT: cmge v0.4s, v0.4s, #0
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: orr v1.4s, #1
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-NEXT: bl use_4xi1
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -227,9 +226,8 @@ define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) {
define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
; CHECK-LABEL: not_sign_4xi32_3:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-NEXT: adrp x8, .LCPI18_0
-; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmge v0.4s, v0.4s, #0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll
index cb758f8a6202b..253ea1cab91fb 100644
--- a/llvm/test/CodeGen/AArch64/signbit-shift.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll
@@ -29,10 +29,9 @@ define i32 @add_zext_ifpos(i32 %x) {
define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: add_zext_ifpos_vec_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT: movi v2.4s, #41
-; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: movi v1.4s, #41
+; CHECK-NEXT: cmge v0.4s, v0.4s, #0
+; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = zext <4 x i1> %c to <4 x i32>
@@ -43,7 +42,7 @@ define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
define i32 @sel_ifpos_tval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_tval_bigger:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #41
+; CHECK-NEXT: mov w8, #41 // =0x29
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cinc w0, w8, ge
; CHECK-NEXT: ret
@@ -78,10 +77,9 @@ define i32 @add_sext_ifpos(i32 %x) {
define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: add_sext_ifpos_vec_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT: movi v2.4s, #42
-; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: movi v1.4s, #42
+; CHECK-NEXT: cmge v0.4s, v0.4s, #0
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = sext <4 x i1> %c to <4 x i32>
@@ -92,7 +90,7 @@ define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
define i32 @sel_ifpos_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_fval_bigger:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #41
+; CHECK-NEXT: mov w8, #41 // =0x29
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cinc w0, w8, lt
; CHECK-NEXT: ret
@@ -128,7 +126,7 @@ define i32 @add_zext_ifneg(i32 %x) {
define i32 @sel_ifneg_tval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifneg_tval_bigger:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #41
+; CHECK-NEXT: mov w8, #41 // =0x29
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cinc w0, w8, lt
; CHECK-NEXT: ret
@@ -162,7 +160,7 @@ define i32 @add_sext_ifneg(i32 %x) {
define i32 @sel_ifneg_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifneg_fval_bigger:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #41
+; CHECK-NEXT: mov w8, #41 // =0x29
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cinc w0, w8, ge
; CHECK-NEXT: ret
@@ -199,7 +197,7 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr_not(i32 %x) {
; CHECK-LABEL: sub_lshr_not:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #42
+; CHECK-NEXT: mov w8, #42 // =0x2a
; CHECK-NEXT: bfxil w8, w0, #31, #1
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll
index 0ae09ebe91630..b80955665c74f 100644
--- a/llvm/test/CodeGen/AArch64/vselect-ext.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll
@@ -543,15 +543,14 @@ entry:
define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred(<16 x i8> %a) {
; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff
+; CHECK-NEXT: cmge.16b v1, v0, #0
; CHECK-NEXT: ushll.8h v2, v0, #0
-; CHECK-NEXT: ushll.4s v4, v2, #0
-; CHECK-NEXT: ushll2.4s v2, v2, #0
-; CHECK-NEXT: cmgt.16b v1, v0, v1
; CHECK-NEXT: ushll2.8h v0, v0, #0
; CHECK-NEXT: sshll.8h v3, v1, #0
; CHECK-NEXT: sshll2.8h v1, v1, #0
+; CHECK-NEXT: ushll.4s v4, v2, #0
; CHECK-NEXT: ushll.4s v5, v0, #0
+; CHECK-NEXT: ushll2.4s v2, v2, #0
; CHECK-NEXT: ushll2.4s v6, v0, #0
; CHECK-NEXT: sshll.4s v0, v3, #0
; CHECK-NEXT: sshll.4s v7, v1, #0
@@ -574,52 +573,51 @@ define void @extension_in_loop_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: Lloh2:
; CHECK-NEXT: adrp x8, lCPI24_0 at PAGE
-; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff
; CHECK-NEXT: Lloh3:
-; CHECK-NEXT: adrp x9, lCPI24_2 at PAGE
+; CHECK-NEXT: adrp x9, lCPI24_1 at PAGE
; CHECK-NEXT: Lloh4:
-; CHECK-NEXT: ldr q1, [x8, lCPI24_0 at PAGEOFF]
+; CHECK-NEXT: adrp x10, lCPI24_2 at PAGE
; CHECK-NEXT: Lloh5:
-; CHECK-NEXT: adrp x8, lCPI24_1 at PAGE
+; CHECK-NEXT: ldr q0, [x8, lCPI24_0 at PAGEOFF]
; CHECK-NEXT: Lloh6:
-; CHECK-NEXT: adrp x10, lCPI24_3 at PAGE
+; CHECK-NEXT: adrp x8, lCPI24_3 at PAGE
; CHECK-NEXT: Lloh7:
-; CHECK-NEXT: ldr q2, [x8, lCPI24_1 at PAGEOFF]
+; CHECK-NEXT: ldr q1, [x9, lCPI24_1 at PAGEOFF]
; CHECK-NEXT: Lloh8:
-; CHECK-NEXT: ldr q3, [x9, lCPI24_2 at PAGEOFF]
+; CHECK-NEXT: ldr q2, [x10, lCPI24_2 at PAGEOFF]
; CHECK-NEXT: Lloh9:
-; CHECK-NEXT: ldr q4, [x10, lCPI24_3 at PAGEOFF]
+; CHECK-NEXT: ldr q3, [x8, lCPI24_3 at PAGEOFF]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: LBB24_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q5, [x0, x8]
+; CHECK-NEXT: ldr q4, [x0, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
-; CHECK-NEXT: cmgt.16b v6, v5, v0
-; CHECK-NEXT: tbl.16b v16, { v5 }, v1
-; CHECK-NEXT: tbl.16b v17, { v5 }, v2
-; CHECK-NEXT: tbl.16b v19, { v5 }, v3
-; CHECK-NEXT: tbl.16b v5, { v5 }, v4
-; CHECK-NEXT: sshll2.8h v7, v6, #0
-; CHECK-NEXT: sshll.8h v6, v6, #0
-; CHECK-NEXT: sshll2.4s v18, v7, #0
-; CHECK-NEXT: sshll.4s v7, v7, #0
-; CHECK-NEXT: sshll2.4s v20, v6, #0
+; CHECK-NEXT: cmge.16b v5, v4, #0
+; CHECK-NEXT: tbl.16b v7, { v4 }, v0
+; CHECK-NEXT: tbl.16b v16, { v4 }, v1
+; CHECK-NEXT: tbl.16b v18, { v4 }, v2
+; CHECK-NEXT: tbl.16b v4, { v4 }, v3
+; CHECK-NEXT: sshll2.8h v6, v5, #0
+; CHECK-NEXT: sshll.8h v5, v5, #0
+; CHECK-NEXT: sshll2.4s v17, v6, #0
; CHECK-NEXT: sshll.4s v6, v6, #0
-; CHECK-NEXT: and.16b v16, v16, v18
-; CHECK-NEXT: and.16b v7, v17, v7
-; CHECK-NEXT: and.16b v17, v19, v20
-; CHECK-NEXT: and.16b v5, v5, v6
-; CHECK-NEXT: stp q7, q16, [x1, #32]
-; CHECK-NEXT: stp q5, q17, [x1], #64
+; CHECK-NEXT: sshll2.4s v19, v5, #0
+; CHECK-NEXT: sshll.4s v5, v5, #0
+; CHECK-NEXT: and.16b v7, v7, v17
+; CHECK-NEXT: and.16b v6, v16, v6
+; CHECK-NEXT: and.16b v16, v18, v19
+; CHECK-NEXT: and.16b v4, v4, v5
+; CHECK-NEXT: stp q6, q7, [x1, #32]
+; CHECK-NEXT: stp q4, q16, [x1], #64
; CHECK-NEXT: b.ne LBB24_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh9
-; CHECK-NEXT: .loh AdrpLdr Lloh5, Lloh7
-; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh8
-; CHECK-NEXT: .loh AdrpAdrp Lloh2, Lloh5
-; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh4
+; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh8
+; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7
+; CHECK-NEXT: .loh AdrpAdrp Lloh2, Lloh6
+; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh5
entry:
br label %loop
@@ -645,52 +643,51 @@ define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: Lloh10:
; CHECK-NEXT: adrp x8, lCPI25_0 at PAGE
-; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff
; CHECK-NEXT: Lloh11:
-; CHECK-NEXT: adrp x9, lCPI25_2 at PAGE
+; CHECK-NEXT: adrp x9, lCPI25_1 at PAGE
; CHECK-NEXT: Lloh12:
-; CHECK-NEXT: ldr q1, [x8, lCPI25_0 at PAGEOFF]
+; CHECK-NEXT: adrp x10, lCPI25_2 at PAGE
; CHECK-NEXT: Lloh13:
-; CHECK-NEXT: adrp x8, lCPI25_1 at PAGE
+; CHECK-NEXT: ldr q0, [x8, lCPI25_0 at PAGEOFF]
; CHECK-NEXT: Lloh14:
-; CHECK-NEXT: adrp x10, lCPI25_3 at PAGE
+; CHECK-NEXT: adrp x8, lCPI25_3 at PAGE
; CHECK-NEXT: Lloh15:
-; CHECK-NEXT: ldr q2, [x8, lCPI25_1 at PAGEOFF]
+; CHECK-NEXT: ldr q1, [x9, lCPI25_1 at PAGEOFF]
; CHECK-NEXT: Lloh16:
-; CHECK-NEXT: ldr q3, [x9, lCPI25_2 at PAGEOFF]
+; CHECK-NEXT: ldr q2, [x10, lCPI25_2 at PAGEOFF]
; CHECK-NEXT: Lloh17:
-; CHECK-NEXT: ldr q4, [x10, lCPI25_3 at PAGEOFF]
+; CHECK-NEXT: ldr q3, [x8, lCPI25_3 at PAGEOFF]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: LBB25_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q5, [x0, x8]
+; CHECK-NEXT: ldr q4, [x0, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
-; CHECK-NEXT: cmgt.16b v6, v5, v0
-; CHECK-NEXT: tbl.16b v16, { v5 }, v1
-; CHECK-NEXT: tbl.16b v17, { v5 }, v2
-; CHECK-NEXT: tbl.16b v19, { v5 }, v3
-; CHECK-NEXT: tbl.16b v5, { v5 }, v4
-; CHECK-NEXT: sshll2.8h v7, v6, #0
-; CHECK-NEXT: sshll.8h v6, v6, #0
-; CHECK-NEXT: sshll2.4s v18, v7, #0
-; CHECK-NEXT: sshll.4s v7, v7, #0
-; CHECK-NEXT: sshll2.4s v20, v6, #0
+; CHECK-NEXT: cmge.16b v5, v4, #0
+; CHECK-NEXT: tbl.16b v7, { v4 }, v0
+; CHECK-NEXT: tbl.16b v16, { v4 }, v1
+; CHECK-NEXT: tbl.16b v18, { v4 }, v2
+; CHECK-NEXT: tbl.16b v4, { v4 }, v3
+; CHECK-NEXT: sshll2.8h v6, v5, #0
+; CHECK-NEXT: sshll.8h v5, v5, #0
+; CHECK-NEXT: sshll2.4s v17, v6, #0
; CHECK-NEXT: sshll.4s v6, v6, #0
-; CHECK-NEXT: and.16b v16, v16, v18
-; CHECK-NEXT: and.16b v7, v17, v7
-; CHECK-NEXT: and.16b v17, v19, v20
-; CHECK-NEXT: and.16b v5, v5, v6
-; CHECK-NEXT: stp q7, q16, [x1, #32]
-; CHECK-NEXT: stp q5, q17, [x1], #64
+; CHECK-NEXT: sshll2.4s v19, v5, #0
+; CHECK-NEXT: sshll.4s v5, v5, #0
+; CHECK-NEXT: and.16b v7, v7, v17
+; CHECK-NEXT: and.16b v6, v16, v6
+; CHECK-NEXT: and.16b v16, v18, v19
+; CHECK-NEXT: and.16b v4, v4, v5
+; CHECK-NEXT: stp q6, q7, [x1, #32]
+; CHECK-NEXT: stp q4, q16, [x1], #64
; CHECK-NEXT: b.ne LBB25_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17
-; CHECK-NEXT: .loh AdrpLdr Lloh13, Lloh15
-; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh16
-; CHECK-NEXT: .loh AdrpAdrp Lloh10, Lloh13
-; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh12
+; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh16
+; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh15
+; CHECK-NEXT: .loh AdrpAdrp Lloh10, Lloh14
+; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh13
entry:
br label %loop
@@ -717,52 +714,51 @@ define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: Lloh18:
; CHECK-NEXT: adrp x8, lCPI26_0 at PAGE
-; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff
; CHECK-NEXT: Lloh19:
-; CHECK-NEXT: adrp x9, lCPI26_2 at PAGE
+; CHECK-NEXT: adrp x9, lCPI26_1 at PAGE
; CHECK-NEXT: Lloh20:
-; CHECK-NEXT: ldr q1, [x8, lCPI26_0 at PAGEOFF]
+; CHECK-NEXT: adrp x10, lCPI26_2 at PAGE
; CHECK-NEXT: Lloh21:
-; CHECK-NEXT: adrp x8, lCPI26_1 at PAGE
+; CHECK-NEXT: ldr q0, [x8, lCPI26_0 at PAGEOFF]
; CHECK-NEXT: Lloh22:
-; CHECK-NEXT: adrp x10, lCPI26_3 at PAGE
+; CHECK-NEXT: adrp x8, lCPI26_3 at PAGE
; CHECK-NEXT: Lloh23:
-; CHECK-NEXT: ldr q2, [x8, lCPI26_1 at PAGEOFF]
+; CHECK-NEXT: ldr q1, [x9, lCPI26_1 at PAGEOFF]
; CHECK-NEXT: Lloh24:
-; CHECK-NEXT: ldr q3, [x9, lCPI26_2 at PAGEOFF]
+; CHECK-NEXT: ldr q2, [x10, lCPI26_2 at PAGEOFF]
; CHECK-NEXT: Lloh25:
-; CHECK-NEXT: ldr q4, [x10, lCPI26_3 at PAGEOFF]
+; CHECK-NEXT: ldr q3, [x8, lCPI26_3 at PAGEOFF]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: LBB26_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q5, [x0, x8]
+; CHECK-NEXT: ldr q4, [x0, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
-; CHECK-NEXT: cmgt.16b v6, v5, v0
-; CHECK-NEXT: tbl.16b v16, { v5 }, v1
-; CHECK-NEXT: tbl.16b v17, { v5 }, v2
-; CHECK-NEXT: tbl.16b v19, { v5 }, v3
-; CHECK-NEXT: tbl.16b v5, { v5 }, v4
-; CHECK-NEXT: sshll2.8h v7, v6, #0
-; CHECK-NEXT: sshll.8h v6, v6, #0
-; CHECK-NEXT: sshll2.4s v18, v7, #0
-; CHECK-NEXT: sshll.4s v7, v7, #0
-; CHECK-NEXT: sshll2.4s v20, v6, #0
+; CHECK-NEXT: cmge.16b v5, v4, #0
+; CHECK-NEXT: tbl.16b v7, { v4 }, v0
+; CHECK-NEXT: tbl.16b v16, { v4 }, v1
+; CHECK-NEXT: tbl.16b v18, { v4 }, v2
+; CHECK-NEXT: tbl.16b v4, { v4 }, v3
+; CHECK-NEXT: sshll2.8h v6, v5, #0
+; CHECK-NEXT: sshll.8h v5, v5, #0
+; CHECK-NEXT: sshll2.4s v17, v6, #0
; CHECK-NEXT: sshll.4s v6, v6, #0
-; CHECK-NEXT: and.16b v16, v16, v18
-; CHECK-NEXT: and.16b v7, v17, v7
-; CHECK-NEXT: and.16b v17, v19, v20
-; CHECK-NEXT: and.16b v5, v5, v6
-; CHECK-NEXT: stp q7, q16, [x1, #32]
-; CHECK-NEXT: stp q5, q17, [x1], #64
+; CHECK-NEXT: sshll2.4s v19, v5, #0
+; CHECK-NEXT: sshll.4s v5, v5, #0
+; CHECK-NEXT: and.16b v7, v7, v17
+; CHECK-NEXT: and.16b v6, v16, v6
+; CHECK-NEXT: and.16b v16, v18, v19
+; CHECK-NEXT: and.16b v4, v4, v5
+; CHECK-NEXT: stp q6, q7, [x1, #32]
+; CHECK-NEXT: stp q4, q16, [x1], #64
; CHECK-NEXT: b.ne LBB26_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25
-; CHECK-NEXT: .loh AdrpLdr Lloh21, Lloh23
-; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh24
-; CHECK-NEXT: .loh AdrpAdrp Lloh18, Lloh21
-; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh20
+; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh24
+; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh23
+; CHECK-NEXT: .loh AdrpAdrp Lloh18, Lloh22
+; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh21
entry:
br label %loop
``````````
</details>
https://github.com/llvm/llvm-project/pull/74499
More information about the llvm-commits
mailing list