[llvm] [AArch64] Combine signext_inreg of setcc(... != splat(0)) (PR #157665)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 9 06:05:08 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Hari Limaye (hazzlim)

<details>
<summary>Changes</summary>

Add the following fold AArch64 DAGCombine:
    Fold setcc_merge_zero(
            pred, insert_subvector(undef, signext_inreg(vNi1), 0),
                != splat(0))
         -> setcc_merge_zero(pred, insert_subvector(undef, shl(vNi1), 0),
                != splat(0))

as the comparison (!= 0) depends only on bit 0 of the input, the left shift is sufficient.

---

Patch is 37.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157665.diff


7 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+32) 
- (modified) llvm/test/CodeGen/AArch64/combine-storetomstore.ll (+135-191) 
- (modified) llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll (-9) 
- (modified) llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll (-5) 
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll (-5) 
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll (-4) 
- (modified) llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll (-2) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5e11145ecd161..57f8e97b1326f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26097,6 +26097,17 @@ static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+static bool isSignExtInReg(const SDValue &V) {
+  if (V.getOpcode() != AArch64ISD::VASHR ||
+      V.getOperand(0).getOpcode() != AArch64ISD::VSHL)
+    return false;
+
+  unsigned BitWidth = V->getValueType(0).getScalarSizeInBits();
+  unsigned ShiftAmtR = V.getConstantOperandVal(1);
+  unsigned ShiftAmtL = V.getOperand(0).getConstantOperandVal(1);
+  return (ShiftAmtR == ShiftAmtL && ShiftAmtR == (BitWidth - 1));
+}
+
 static SDValue
 performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
   assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
@@ -26137,6 +26148,27 @@ performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
                          LHS->getOperand(0), Pred);
   }
 
+  //    setcc_merge_zero(
+  //       pred, insert_subvector(undef, signext_inreg(vNi1), 0), != splat(0))
+  // => setcc_merge_zero(
+  //       pred, insert_subvector(undef, shl(vNi1), 0), != splat(0))
+  if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
+      LHS->getOpcode() == ISD::INSERT_SUBVECTOR && LHS.hasOneUse()) {
+    SDValue L0 = LHS->getOperand(0);
+    SDValue L1 = LHS->getOperand(1);
+    SDValue L2 = LHS->getOperand(2);
+
+    if (L0.getOpcode() == ISD::UNDEF && isNullConstant(L2) &&
+        isSignExtInReg(L1)) {
+      SDLoc DL(N);
+      SDValue Shl = L1.getOperand(0);
+      SDValue NewLHS = DAG.getNode(ISD::INSERT_SUBVECTOR, DL,
+                                   LHS.getValueType(), L0, Shl, L2);
+      return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, N->getValueType(0),
+                         Pred, NewLHS, RHS, N->getOperand(3));
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll
index c2e54d3d39394..6ab2d7c2d7857 100644
--- a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll
+++ b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll
@@ -24,7 +24,6 @@ define void @test_masked_store_success_v4i16(<4 x i16> %x, ptr %ptr, <4 x i1> %m
 ; SVE-NEXT:    shl v1.4h, v1.4h, #15
 ; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    cmlt v1.4h, v1.4h, #0
 ; SVE-NEXT:    cmpne p0.h, p0/z, z1.h, #0
 ; SVE-NEXT:    st1h { z0.h }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -41,7 +40,6 @@ define void @test_masked_store_success_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %m
 ; SVE-NEXT:    ptrue p0.s, vl4
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    shl v1.4s, v1.4s, #31
-; SVE-NEXT:    cmlt v1.4s, v1.4s, #0
 ; SVE-NEXT:    cmpne p0.s, p0/z, z1.s, #0
 ; SVE-NEXT:    st1w { z0.s }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -63,8 +61,6 @@ define void @test_masked_store_success_v4i64(<4 x i64> %x, ptr %ptr, <4 x i1> %m
 ; SVE-NEXT:    ushll v2.2d, v2.2s, #0
 ; SVE-NEXT:    shl v3.2d, v3.2d, #63
 ; SVE-NEXT:    shl v2.2d, v2.2d, #63
-; SVE-NEXT:    cmlt v3.2d, v3.2d, #0
-; SVE-NEXT:    cmlt v2.2d, v2.2d, #0
 ; SVE-NEXT:    cmpne p1.d, p0/z, z3.d, #0
 ; SVE-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; SVE-NEXT:    st1d { z1.d }, p1, [x0, x8, lsl #3]
@@ -82,7 +78,6 @@ define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> %
 ; SVE-NEXT:    shl v1.4h, v1.4h, #15
 ; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    cmlt v1.4h, v1.4h, #0
 ; SVE-NEXT:    cmpne p0.h, p0/z, z1.h, #0
 ; SVE-NEXT:    st1h { z0.h }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -99,7 +94,6 @@ define void @test_masked_store_success_v4f32(<4 x float> %x, ptr %ptr, <4 x i1>
 ; SVE-NEXT:    ptrue p0.s, vl4
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    shl v1.4s, v1.4s, #31
-; SVE-NEXT:    cmlt v1.4s, v1.4s, #0
 ; SVE-NEXT:    cmpne p0.s, p0/z, z1.s, #0
 ; SVE-NEXT:    st1w { z0.s }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -121,8 +115,6 @@ define void @test_masked_store_success_v4f64(<4 x double> %x, ptr %ptr, <4 x i1>
 ; SVE-NEXT:    ushll v2.2d, v2.2s, #0
 ; SVE-NEXT:    shl v3.2d, v3.2d, #63
 ; SVE-NEXT:    shl v2.2d, v2.2d, #63
-; SVE-NEXT:    cmlt v3.2d, v3.2d, #0
-; SVE-NEXT:    cmlt v2.2d, v2.2d, #0
 ; SVE-NEXT:    cmpne p1.d, p0/z, z3.d, #0
 ; SVE-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; SVE-NEXT:    st1d { z1.d }, p1, [x0, x8, lsl #3]
@@ -140,7 +132,6 @@ define void @test_masked_store_success_v8i8(<8 x i8> %x, ptr %ptr, <8 x i1> %mas
 ; SVE-NEXT:    shl v1.8b, v1.8b, #7
 ; SVE-NEXT:    ptrue p0.b, vl8
 ; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    cmlt v1.8b, v1.8b, #0
 ; SVE-NEXT:    cmpne p0.b, p0/z, z1.b, #0
 ; SVE-NEXT:    st1b { z0.b }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -157,7 +148,6 @@ define void @test_masked_store_success_v8i16(<8 x i16> %x, ptr %ptr, <8 x i1> %m
 ; SVE-NEXT:    ptrue p0.h, vl8
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    shl v1.8h, v1.8h, #15
-; SVE-NEXT:    cmlt v1.8h, v1.8h, #0
 ; SVE-NEXT:    cmpne p0.h, p0/z, z1.h, #0
 ; SVE-NEXT:    st1h { z0.h }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -180,8 +170,6 @@ define void @test_masked_store_success_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %m
 ; SVE-NEXT:    ushll v2.4s, v2.4h, #0
 ; SVE-NEXT:    shl v3.4s, v3.4s, #31
 ; SVE-NEXT:    shl v2.4s, v2.4s, #31
-; SVE-NEXT:    cmlt v3.4s, v3.4s, #0
-; SVE-NEXT:    cmlt v2.4s, v2.4s, #0
 ; SVE-NEXT:    cmpne p1.s, p0/z, z3.s, #0
 ; SVE-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; SVE-NEXT:    st1w { z1.s }, p1, [x0, x8, lsl #2]
@@ -219,12 +207,8 @@ define void @test_masked_store_success_v8i64(<8 x i64> %x, ptr %ptr, <8 x i1> %m
 ; SVE-NEXT:    shl v4.2d, v4.2d, #63
 ; SVE-NEXT:    shl v5.2d, v5.2d, #63
 ; SVE-NEXT:    shl v6.2d, v6.2d, #63
-; SVE-NEXT:    shl v7.2d, v7.2d, #63
-; SVE-NEXT:    cmlt v4.2d, v4.2d, #0
-; SVE-NEXT:    cmlt v5.2d, v5.2d, #0
-; SVE-NEXT:    cmlt v6.2d, v6.2d, #0
 ; SVE-NEXT:    cmpne p1.d, p0/z, z4.d, #0
-; SVE-NEXT:    cmlt v4.2d, v7.2d, #0
+; SVE-NEXT:    shl v4.2d, v7.2d, #63
 ; SVE-NEXT:    cmpne p2.d, p0/z, z5.d, #0
 ; SVE-NEXT:    cmpne p3.d, p0/z, z6.d, #0
 ; SVE-NEXT:    cmpne p0.d, p0/z, z4.d, #0
@@ -247,7 +231,6 @@ define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> %
 ; SVE-NEXT:    ptrue p0.h, vl8
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    shl v1.8h, v1.8h, #15
-; SVE-NEXT:    cmlt v1.8h, v1.8h, #0
 ; SVE-NEXT:    cmpne p0.h, p0/z, z1.h, #0
 ; SVE-NEXT:    st1h { z0.h }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -270,8 +253,6 @@ define void @test_masked_store_success_v8f32(<8 x float> %x, ptr %ptr, <8 x i1>
 ; SVE-NEXT:    ushll v2.4s, v2.4h, #0
 ; SVE-NEXT:    shl v3.4s, v3.4s, #31
 ; SVE-NEXT:    shl v2.4s, v2.4s, #31
-; SVE-NEXT:    cmlt v3.4s, v3.4s, #0
-; SVE-NEXT:    cmlt v2.4s, v2.4s, #0
 ; SVE-NEXT:    cmpne p1.s, p0/z, z3.s, #0
 ; SVE-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; SVE-NEXT:    st1w { z1.s }, p1, [x0, x8, lsl #2]
@@ -309,12 +290,8 @@ define void @test_masked_store_success_v8f64(<8 x double> %x, ptr %ptr, <8 x i1>
 ; SVE-NEXT:    shl v4.2d, v4.2d, #63
 ; SVE-NEXT:    shl v5.2d, v5.2d, #63
 ; SVE-NEXT:    shl v6.2d, v6.2d, #63
-; SVE-NEXT:    shl v7.2d, v7.2d, #63
-; SVE-NEXT:    cmlt v4.2d, v4.2d, #0
-; SVE-NEXT:    cmlt v5.2d, v5.2d, #0
-; SVE-NEXT:    cmlt v6.2d, v6.2d, #0
 ; SVE-NEXT:    cmpne p1.d, p0/z, z4.d, #0
-; SVE-NEXT:    cmlt v4.2d, v7.2d, #0
+; SVE-NEXT:    shl v4.2d, v7.2d, #63
 ; SVE-NEXT:    cmpne p2.d, p0/z, z5.d, #0
 ; SVE-NEXT:    cmpne p3.d, p0/z, z6.d, #0
 ; SVE-NEXT:    cmpne p0.d, p0/z, z4.d, #0
@@ -336,7 +313,6 @@ define void @test_masked_store_success_v16i8(<16 x i8> %x, ptr %ptr, <16 x i1> %
 ; SVE-NEXT:    shl v1.16b, v1.16b, #7
 ; SVE-NEXT:    ptrue p0.b, vl16
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    cmlt v1.16b, v1.16b, #0
 ; SVE-NEXT:    cmpne p0.b, p0/z, z1.b, #0
 ; SVE-NEXT:    st1b { z0.b }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -357,8 +333,6 @@ define void @test_masked_store_success_v16i16(<16 x i16> %x, ptr %ptr, <16 x i1>
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    shl v3.8h, v3.8h, #15
 ; SVE-NEXT:    shl v2.8h, v2.8h, #15
-; SVE-NEXT:    cmlt v3.8h, v3.8h, #0
-; SVE-NEXT:    cmlt v2.8h, v2.8h, #0
 ; SVE-NEXT:    cmpne p1.h, p0/z, z3.h, #0
 ; SVE-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; SVE-NEXT:    st1h { z1.h }, p1, [x0, x8, lsl #1]
@@ -391,13 +365,9 @@ define void @test_masked_store_success_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1>
 ; SVE-NEXT:    ushll v7.4s, v7.4h, #0
 ; SVE-NEXT:    ushll v5.4s, v5.4h, #0
 ; SVE-NEXT:    shl v4.4s, v4.4s, #31
-; SVE-NEXT:    cmlt v6.4s, v6.4s, #0
+; SVE-NEXT:    cmpne p1.s, p0/z, z6.s, #0
 ; SVE-NEXT:    shl v7.4s, v7.4s, #31
 ; SVE-NEXT:    shl v5.4s, v5.4s, #31
-; SVE-NEXT:    cmlt v4.4s, v4.4s, #0
-; SVE-NEXT:    cmpne p1.s, p0/z, z6.s, #0
-; SVE-NEXT:    cmlt v7.4s, v7.4s, #0
-; SVE-NEXT:    cmlt v5.4s, v5.4s, #0
 ; SVE-NEXT:    cmpne p2.s, p0/z, z7.s, #0
 ; SVE-NEXT:    cmpne p3.s, p0/z, z5.s, #0
 ; SVE-NEXT:    cmpne p0.s, p0/z, z4.s, #0
@@ -479,8 +449,6 @@ define void @test_masked_store_success_v32i8(<32 x i8> %x, ptr %ptr, <32 x i1> %
 ; SVE-NEXT:    mov w8, #16 // =0x10
 ; SVE-NEXT:    shl v2.16b, v2.16b, #7
 ; SVE-NEXT:    shl v3.16b, v3.16b, #7
-; SVE-NEXT:    cmlt v2.16b, v2.16b, #0
-; SVE-NEXT:    cmlt v3.16b, v3.16b, #0
 ; SVE-NEXT:    cmpne p1.b, p0/z, z3.b, #0
 ; SVE-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; SVE-NEXT:    st1b { z1.b }, p1, [x0, x8]
@@ -565,12 +533,8 @@ define void @test_masked_store_success_v32i16(<32 x i16> %x, ptr %ptr, <32 x i1>
 ; SVE-NEXT:    shl v4.8h, v4.8h, #15
 ; SVE-NEXT:    shl v5.8h, v5.8h, #15
 ; SVE-NEXT:    shl v6.8h, v6.8h, #15
-; SVE-NEXT:    shl v7.8h, v7.8h, #15
-; SVE-NEXT:    cmlt v4.8h, v4.8h, #0
-; SVE-NEXT:    cmlt v5.8h, v5.8h, #0
-; SVE-NEXT:    cmlt v6.8h, v6.8h, #0
 ; SVE-NEXT:    cmpne p1.h, p0/z, z4.h, #0
-; SVE-NEXT:    cmlt v4.8h, v7.8h, #0
+; SVE-NEXT:    shl v4.8h, v7.8h, #15
 ; SVE-NEXT:    cmpne p2.h, p0/z, z5.h, #0
 ; SVE-NEXT:    cmpne p3.h, p0/z, z6.h, #0
 ; SVE-NEXT:    cmpne p0.h, p0/z, z4.h, #0
@@ -595,144 +559,140 @@ define void @test_masked_store_success_v64i8(<64 x i8> %x, ptr %ptr, <64 x i1> %
 ; SVE-NEXT:    .cfi_offset w29, -16
 ; SVE-NEXT:    ldr w8, [sp, #216]
 ; SVE-NEXT:    ldr w9, [sp, #344]
-; SVE-NEXT:    fmov s7, w1
+; SVE-NEXT:    fmov s6, w1
 ; SVE-NEXT:    ldr w11, [sp, #88]
 ; SVE-NEXT:    ldr w10, [sp, #224]
 ; SVE-NEXT:    ptrue p0.b, vl16
-; SVE-NEXT:    fmov s4, w8
-; SVE-NEXT:    fmov s5, w9
+; SVE-NEXT:    fmov s5, w8
+; SVE-NEXT:    fmov s4, w9
 ; SVE-NEXT:    ldr w8, [sp, #352]
-; SVE-NEXT:    fmov s6, w11
+; SVE-NEXT:    fmov s7, w11
 ; SVE-NEXT:    ldr w9, [sp, #96]
-; SVE-NEXT:    mov v7.b[1], w2
+; SVE-NEXT:    mov v6.b[1], w2
 ; SVE-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; SVE-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    mov v4.b[1], w10
-; SVE-NEXT:    mov v5.b[1], w8
+; SVE-NEXT:    mov v5.b[1], w10
+; SVE-NEXT:    mov v4.b[1], w8
 ; SVE-NEXT:    ldr w8, [sp, #232]
-; SVE-NEXT:    mov v6.b[1], w9
+; SVE-NEXT:    mov v7.b[1], w9
 ; SVE-NEXT:    ldr w9, [sp, #360]
 ; SVE-NEXT:    ldr w10, [sp, #112]
-; SVE-NEXT:    mov v7.b[2], w3
-; SVE-NEXT:    mov v4.b[2], w8
+; SVE-NEXT:    mov v6.b[2], w3
+; SVE-NEXT:    mov v5.b[2], w8
 ; SVE-NEXT:    ldr w8, [sp, #104]
-; SVE-NEXT:    mov v5.b[2], w9
+; SVE-NEXT:    mov v4.b[2], w9
 ; SVE-NEXT:    ldr w9, [sp, #368]
-; SVE-NEXT:    mov v6.b[2], w8
+; SVE-NEXT:    mov v7.b[2], w8
 ; SVE-NEXT:    ldr w8, [sp, #240]
-; SVE-NEXT:    mov v7.b[3], w4
-; SVE-NEXT:    mov v4.b[3], w8
-; SVE-NEXT:    mov v5.b[3], w9
+; SVE-NEXT:    mov v6.b[3], w4
+; SVE-NEXT:    mov v5.b[3], w8
+; SVE-NEXT:    mov v4.b[3], w9
 ; SVE-NEXT:    ldr w8, [sp, #248]
 ; SVE-NEXT:    ldr w9, [sp, #376]
-; SVE-NEXT:    mov v6.b[3], w10
+; SVE-NEXT:    mov v7.b[3], w10
 ; SVE-NEXT:    ldr w10, [sp, #120]
-; SVE-NEXT:    mov v7.b[4], w5
-; SVE-NEXT:    mov v4.b[4], w8
-; SVE-NEXT:    mov v5.b[4], w9
+; SVE-NEXT:    mov v6.b[4], w5
+; SVE-NEXT:    mov v5.b[4], w8
+; SVE-NEXT:    mov v4.b[4], w9
 ; SVE-NEXT:    ldr w8, [sp, #256]
 ; SVE-NEXT:    ldr w9, [sp, #384]
-; SVE-NEXT:    mov v6.b[4], w10
+; SVE-NEXT:    mov v7.b[4], w10
 ; SVE-NEXT:    ldr w10, [sp, #128]
-; SVE-NEXT:    mov v7.b[5], w6
-; SVE-NEXT:    mov v4.b[5], w8
-; SVE-NEXT:    mov v5.b[5], w9
+; SVE-NEXT:    mov v6.b[5], w6
+; SVE-NEXT:    mov v5.b[5], w8
+; SVE-NEXT:    mov v4.b[5], w9
 ; SVE-NEXT:    ldr w8, [sp, #264]
 ; SVE-NEXT:    ldr w9, [sp, #392]
-; SVE-NEXT:    mov v6.b[5], w10
+; SVE-NEXT:    mov v7.b[5], w10
 ; SVE-NEXT:    ldr w10, [sp, #136]
-; SVE-NEXT:    mov v7.b[6], w7
-; SVE-NEXT:    mov v4.b[6], w8
-; SVE-NEXT:    mov v5.b[6], w9
+; SVE-NEXT:    mov v6.b[6], w7
+; SVE-NEXT:    mov v5.b[6], w8
+; SVE-NEXT:    mov v4.b[6], w9
 ; SVE-NEXT:    ldr w8, [sp, #272]
 ; SVE-NEXT:    ldr w9, [sp, #400]
-; SVE-NEXT:    mov v6.b[6], w10
+; SVE-NEXT:    mov v7.b[6], w10
 ; SVE-NEXT:    ldr w10, [sp, #144]
-; SVE-NEXT:    mov v4.b[7], w8
+; SVE-NEXT:    mov v5.b[7], w8
 ; SVE-NEXT:    ldr w8, [sp, #16]
-; SVE-NEXT:    mov v5.b[7], w9
+; SVE-NEXT:    mov v4.b[7], w9
 ; SVE-NEXT:    ldr w9, [sp, #280]
-; SVE-NEXT:    mov v6.b[7], w10
-; SVE-NEXT:    mov v7.b[7], w8
+; SVE-NEXT:    mov v7.b[7], w10
+; SVE-NEXT:    mov v6.b[7], w8
 ; SVE-NEXT:    ldr w10, [sp, #408]
 ; SVE-NEXT:    ldr w8, [sp, #152]
-; SVE-NEXT:    mov v4.b[8], w9
+; SVE-NEXT:    mov v5.b[8], w9
 ; SVE-NEXT:    ldr w9, [sp, #24]
-; SVE-NEXT:    mov v5.b[8], w10
+; SVE-NEXT:    mov v4.b[8], w10
 ; SVE-NEXT:    ldr w10, [sp, #288]
-; SVE-NEXT:    mov v6.b[8], w8
-; SVE-NEXT:    mov v7.b[8], w9
+; SVE-NEXT:    mov v7.b[8], w8
+; SVE-NEXT:    mov v6.b[8], w9
 ; SVE-NEXT:    ldr w8, [sp, #416]
 ; SVE-NEXT:    ldr w9, [sp, #160]
-; SVE-NEXT:    mov v4.b[9], w10
+; SVE-NEXT:    mov v5.b[9], w10
 ; SVE-NEXT:    ldr w10, [sp, #32]
-; SVE-NEXT:    mov v5.b[9], w8
+; SVE-NEXT:    mov v4.b[9], w8
 ; SVE-NEXT:    ldr w8, [sp, #296]
-; SVE-NEXT:    mov v6.b[9], w9
-; SVE-NEXT:    mov v7.b[9], w10
+; SVE-NEXT:    mov v7.b[9], w9
+; SVE-NEXT:    mov v6.b[9], w10
 ; SVE-NEXT:    ldr w9, [sp, #424]
 ; SVE-NEXT:    ldr w10, [sp, #168]
-; SVE-NEXT:    mov v4.b[10], w8
+; SVE-NEXT:    mov v5.b[10], w8
 ; SVE-NEXT:    ldr w8, [sp, #40]
-; SVE-NEXT:    mov v5.b[10], w9
+; SVE-NEXT:    mov v4.b[10], w9
 ; SVE-NEXT:    ldr w9, [sp, #304]
-; SVE-NEXT:    mov v6.b[10], w10
-; SVE-NEXT:    mov v7.b[10], w8
+; SVE-NEXT:    mov v7.b[10], w10
+; SVE-NEXT:    mov v6.b[10], w8
 ; SVE-NEXT:    ldr w10, [sp, #432]
 ; SVE-NEXT:    ldr w8, [sp, #176]
-; SVE-NEXT:    mov v4.b[11], w9
+; SVE-NEXT:    mov v5.b[11], w9
 ; SVE-NEXT:    ldr w9, [sp, #48]
-; SVE-NEXT:    mov v5.b[11], w10
+; SVE-NEXT:    mov v4.b[11], w10
 ; SVE-NEXT:    ldr w10, [sp, #312]
-; SVE-NEXT:    mov v6.b[11], w8
-; SVE-NEXT:    mov v7.b[11], w9
+; SVE-NEXT:    mov v7.b[11], w8
+; SVE-NEXT:    mov v6.b[11], w9
 ; SVE-NEXT:    ldr w8, [sp, #440]
 ; SVE-NEXT:    ldr w9, [sp, #184]
-; SVE-NEXT:    mov v4.b[12], w10
+; SVE-NEXT:    mov v5.b[12], w10
 ; SVE-NEXT:    ldr w10, [sp, #56]
-; SVE-NEXT:    mov v5.b[12], w8
+; SVE-NEXT:    mov v4.b[12], w8
 ; SVE-NEXT:    ldr w8, [sp, #320]
-; SVE-NEXT:    mov v6.b[12], w9
-; SVE-NEXT:    mov v7.b[12], w10
+; SVE-NEXT:    mov v7.b[12], w9
+; SVE-NEXT:    mov v6.b[12], w10
 ; SVE-NEXT:    ldr w9, [sp, #448]
 ; SVE-NEXT:    ldr w10, [sp, #192]
-; SVE-NEXT:    mov v4.b[13], w8
+; SVE-NEXT:    mov v5.b[13], w8
 ; SVE-NEXT:    ldr w8, [sp, #64]
-; SVE-NEXT:    mov v5.b[13], w9
+; SVE-NEXT:    mov v4.b[13], w9
 ; SVE-NEXT:    ldr w9, [sp, #328]
-; SVE-NEXT:    mov v6.b[13], w10
-; SVE-NEXT:    mov v7.b[13], w8
+; SVE-NEXT:    mov v7.b[13], w10
+; SVE-NEXT:    mov v6.b[13], w8
 ; SVE-NEXT:    ldr w10, [sp, #456]
 ; SVE-NEXT:    ldr w8, [sp, #200]
-; SVE-NEXT:    mov v4.b[14], w9
+; SVE-NEXT:    mov v5.b[14], w9
 ; SVE-NEXT:    ldr w9, [sp, #72]
-; SVE-NEXT:    mov v5.b[14], w10
+; SVE-NEXT:    mov v4.b[14], w10
 ; SVE-NEXT:    ldr w10, [sp, #336]
-; SVE-NEXT:    mov v6.b[14], w8
-; SVE-NEXT:    mov v7.b[14], w9
+; SVE-NEXT:    mov v7.b[14], w8
+; SVE-NEXT:    mov v6.b[14], w9
 ; SVE-NEXT:    ldr w8, [sp, #464]
 ; SVE-NEXT:    ldr w9, [sp, #208]
-; SVE-NEXT:    mov v4.b[15], w10
+; SVE-NEXT:    mov v5.b[15], w10
 ; SVE-NEXT:    ldr w10, [sp, #80]
-; SVE-NEXT:    mov v5.b[15], w8
+; SVE-NEXT:    mov v4.b[15], w8
 ; SVE-NEXT:    mov w8, #32 // =0x20
-; SVE-NEXT:    mov v6.b[15], w9
-; SVE-NEXT:    mov v7.b[15], w10
+; SVE-NEXT:    mov v7.b[15], w9
+; SVE-NEXT:    mov v6.b[15], w10
 ; SVE-NEXT:    mov w9, #48 // =0x30
-; SVE-NEXT:    shl v4.16b, v4.16b, #7
 ; SVE-NEXT:    shl v5.16b, v5.16b, #7
-; SVE-NEXT:    shl v6.16b, v6.16b, #7
+; SVE-NEXT:    shl v4.16b, v4.16b, #7
 ; SVE-NEXT:    shl v7.16b, v7.16b, #7
-; SVE-NEXT:    cmlt v4.16b, v4.16b, #0
-; SVE-NEXT:    cmlt v5.16b, v5.16b, #0
-; SVE-NEXT:    cmlt v6.16b, v6.16b, #0
-; SVE-NEXT:    cmpne p1.b, p0/z, z4.b, #0
-; SVE-NEXT:    cmlt v4.16b, v7.16b, #0
-; SVE-NEXT:    cmpne p2.b, p0/z, z5.b, #0
-; SVE-NEXT:    cmpne p3.b, p0/z, z6.b, #0
-; SVE-NEXT:    cmpne p0.b, p0/z, z4.b, #0
+; SVE-NEXT:    cmpne p1.b, p0/z, z5.b, #0
+; SVE-NEXT:    shl v5.16b, v6.16b, #7
+; SVE-NEXT:    cmpne p2.b, p0/z, z4.b, #0
+; SVE-NEXT:    cmpne p3.b, p0/z, z7.b, #0
+; SVE-NEXT:    cmpne p0.b, p0/z, z5.b, #0
 ; SVE-NEXT:    st1b { z2.b }, p1, [x0, x8]
 ; SVE-NEXT:    mov w8, #16 // =0x10
 ; SVE-NEXT:    st1b { z3.b }, p2, [x0, x9]
@@ -755,7 +715,6 @@ define void @test_masked_store_success_invert_mask_v4i32(<4 x i32> %x, ptr %ptr,
 ; SVE-NEXT:    eor v1.8b, v1.8b, v2.8b
 ; SVE-NEXT:    ushll v1.4s, v1.4h, #0
 ; SVE-NEXT:    shl v1.4s, v1.4s, #31
-; SVE-NEXT:    cmlt v1.4s, v1.4s, #0
 ; SVE-NEXT:    cmpne p0.s, p0/z, z1.s, #0
 ; SVE-NEXT:    st1w { z0.s }, p0, [x0]
 ; SVE-NEXT:    ret
@@ -947,29 +906,27 @@ define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %p
 ; SVE-LABEL: test_masked_store_multiple_v8i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    zip2 v6.8b, v4.8b, v0.8b
-; SVE-NEXT:    zip1 v4.8b, v4.8b, v0.8b
+; SVE-NEXT:    zip1 v6.8b, v5.8b, v0.8b
+; SVE-NEXT:    zip2 v7.8b, v4.8b, v0.8b
 ; SVE-NEXT:    mov x8, #4 // =0x4
-; SVE-NEXT:    zip1 v7.8b, v5.8b, v0.8b
 ; SVE-NEXT:    zip2 v5.8b, v5.8b, v0.8b
+; SVE-NEXT:    zip1 v4.8b, v4.8b, v0.8b
 ; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    ptrue p0.s, vl4
 ; SVE-NEXT:    ushll v6.4s, v6.4h, #0
-; SVE-NEXT:    ushll v4.4s, v4.4h, #0
 ; SVE-NEXT:    ushll v7.4s, v7.4h, #0
 ; SVE-NEXT:    ushll v5.4s, v5.4h, #0
+; SVE-NEXT:    ushll v4.4s, v4.4h, #0
 ; SVE-NEXT:    shl v6.4s, v6.4s, #31
-; SVE-NEXT:    shl v4.4s, v4.4s, #31
 ; SVE-NEXT:    shl v7.4s, v7.4s, #31
 ; SVE-NEXT:    shl v5.4s, v5.4s, #31
+; SVE-NEXT:    shl v4.4s, v4.4s, #31
 ; SVE-NEXT:    cmlt v6.4s, v6.4s, #0
-; SVE-NEXT:    cmlt v4.4s, v4.4s, #0
-; SVE-NEXT:    cmlt v7.4s, v7.4s, #0
+; SVE-NEXT:    cmpne p1.s, p0/z, z7.s, #0
 ; SVE-NEXT:    cmlt v5.4s, v5.4s, #0
-; SVE-NEXT:    cmpne p1.s, p0/z, z6.s, #0
-; SVE-NEXT:    ldp q6, q16, [x1]
+; SVE-NEXT:    ldp q7, q16, [x1]
 ; SVE-NEXT:    cmpne p0.s, p0/z, z4.s, #0
-; SVE-NEXT:    bif v2.16b, v6.16b, v7.16b
+; SVE-NEXT:    bif v2.16b, v7.16b, v6.16b
 ; SVE-NEXT:    bif v3.16b, v16.16b, v5.16b
 ; SVE-NEXT:    st1w { z1.s }, p1, [x0, x8, lsl #2]
 ; SVE-NEXT:    st1w { z0.s }, p0, [x0]
@@ -987,74 +944,70 @@ define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %p
 define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) {
 ; SVE-LABEL: test_masked_store_multiple_v8i64:
 ; SVE:       // %bb.0:
-; SVE-NEXT:    ldp d16, d18, [sp]
-; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    ldp d16, d17, [sp]
+; SVE-NEXT:    ptrue p1.d, vl2
+; SVE-NEXT:    mov x9, #4 // =0x4
 ; SVE-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; SVE-NEXT:    // kill: def $q2 killed $q2 def $z2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    mov x8, #6 // =0x6
-; SVE-NEXT:    mov x9, #4 // =0x4
 ; SVE-NEXT:    // kill: def $q1 ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/157665


More information about the llvm-commits mailing list