[llvm] [RISCV] Lower BUILD_VECTOR with i64 type to VID on RV32 if possible (PR #132339)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 20 22:26:56 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Jim Lin (tclin914)

<details>
<summary>Changes</summary>

The element type i64 of the BUILD_VECTOR is not legal on RV32. It doesn't catch the VID pattern after being legalized to i64.
So try to customized lower it to VID before type legalization.

---
Full diff: https://github.com/llvm/llvm-project/pull/132339.diff


5 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+83-59) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+5-11) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll (+11-21) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll (+22-58) 
- (modified) llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll (+15-25) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 132faf5b85c1a..ff0ffa16f9aa6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1281,6 +1281,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
           setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
+
+          // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
+          setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom);
         }
 
         setOperationAction(
@@ -3601,6 +3604,78 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
   return Gather;
 }
 
+static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
+                                      const RISCVSubtarget &Subtarget) {
+  MVT VT = Op.getSimpleValueType();
+  assert(VT.isFixedLengthVector() && "Unexpected vector!");
+
+  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+
+  SDLoc DL(Op);
+  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+  if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
+    int64_t StepNumerator = SimpleVID->StepNumerator;
+    unsigned StepDenominator = SimpleVID->StepDenominator;
+    int64_t Addend = SimpleVID->Addend;
+
+    assert(StepNumerator != 0 && "Invalid step");
+    bool Negate = false;
+    int64_t SplatStepVal = StepNumerator;
+    unsigned StepOpcode = ISD::MUL;
+    // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
+    // anyway as the shift of 63 won't fit in uimm5.
+    if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
+        isPowerOf2_64(std::abs(StepNumerator))) {
+      Negate = StepNumerator < 0;
+      StepOpcode = ISD::SHL;
+      SplatStepVal = Log2_64(std::abs(StepNumerator));
+    }
+
+    // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
+    // threshold since it's the immediate value many RVV instructions accept.
+    // There is no vmul.vi instruction so ensure multiply constant can fit in
+    // a single addi instruction.
+    if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
+         (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
+        isPowerOf2_32(StepDenominator) &&
+        (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
+      MVT VIDVT =
+          VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
+      MVT VIDContainerVT =
+          getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
+      SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
+      // Convert right out of the scalable type so we can use standard ISD
+      // nodes for the rest of the computation. If we used scalable types with
+      // these, we'd lose the fixed-length vector info and generate worse
+      // vsetvli code.
+      VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
+      if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
+          (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
+        SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
+        VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
+      }
+      if (StepDenominator != 1) {
+        SDValue SplatStep =
+            DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
+        VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
+      }
+      if (Addend != 0 || Negate) {
+        SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
+        VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
+                          VID);
+      }
+      if (VT.isFloatingPoint()) {
+        // TODO: Use vfwcvt to reduce register pressure.
+        VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
+      }
+      return VID;
+    }
+  }
+
+  return SDValue();
+}
+
 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
 /// which constitute a large proportion of the elements. In such cases we can
 /// splat a vector with the dominant element and make up the shortfall with
@@ -3818,64 +3893,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
   // Try and match index sequences, which we can lower to the vid instruction
   // with optional modifications. An all-undef vector is matched by
   // getSplatValue, above.
-  if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
-    int64_t StepNumerator = SimpleVID->StepNumerator;
-    unsigned StepDenominator = SimpleVID->StepDenominator;
-    int64_t Addend = SimpleVID->Addend;
-
-    assert(StepNumerator != 0 && "Invalid step");
-    bool Negate = false;
-    int64_t SplatStepVal = StepNumerator;
-    unsigned StepOpcode = ISD::MUL;
-    // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
-    // anyway as the shift of 63 won't fit in uimm5.
-    if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
-        isPowerOf2_64(std::abs(StepNumerator))) {
-      Negate = StepNumerator < 0;
-      StepOpcode = ISD::SHL;
-      SplatStepVal = Log2_64(std::abs(StepNumerator));
-    }
-
-    // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
-    // threshold since it's the immediate value many RVV instructions accept.
-    // There is no vmul.vi instruction so ensure multiply constant can fit in
-    // a single addi instruction.
-    if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
-         (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
-        isPowerOf2_32(StepDenominator) &&
-        (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
-      MVT VIDVT =
-          VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
-      MVT VIDContainerVT =
-          getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
-      SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
-      // Convert right out of the scalable type so we can use standard ISD
-      // nodes for the rest of the computation. If we used scalable types with
-      // these, we'd lose the fixed-length vector info and generate worse
-      // vsetvli code.
-      VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
-      if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
-          (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
-        SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
-        VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
-      }
-      if (StepDenominator != 1) {
-        SDValue SplatStep =
-            DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
-        VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
-      }
-      if (Addend != 0 || Negate) {
-        SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
-        VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
-                          VID);
-      }
-      if (VT.isFloatingPoint()) {
-        // TODO: Use vfwcvt to reduce register pressure.
-        VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
-      }
-      return VID;
-    }
-  }
+  if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
+    return Res;
 
   // For very small build_vectors, use a single scalar insert of a constant.
   // TODO: Base this on constant rematerialization cost, not size.
@@ -7473,8 +7492,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerVECTOR_REVERSE(Op, DAG);
   case ISD::VECTOR_SPLICE:
     return lowerVECTOR_SPLICE(Op, DAG);
-  case ISD::BUILD_VECTOR:
+  case ISD::BUILD_VECTOR: {
+    MVT VT = Op.getSimpleValueType();
+    MVT EltVT = VT.getVectorElementType();
+    if (!Subtarget.is64Bit() && EltVT == MVT::i64)
+      return lowerBuildVectorViaVID(Op, DAG, Subtarget);
     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
+  }
   case ISD::SPLAT_VECTOR: {
     MVT VT = Op.getSimpleValueType();
     MVT EltVT = VT.getVectorElementType();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index a91263e85e9e8..41f2e9afbb07d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -290,15 +290,11 @@ define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3)
   ret void
 }
 
-; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization.
 define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
 ; RV32-LABEL: buildvec_vid_step1_add0_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI25_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI25_0)
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vle8.v v10, (a0)
-; RV32-NEXT:    vsext.vf4 v8, v10
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vid.v v8
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
@@ -323,11 +319,9 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
 define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
 ; RV32-LABEL: buildvec_vid_step2_add0_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI26_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI26_0)
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vle8.v v10, (a0)
-; RV32-NEXT:    vsext.vf4 v8, v10
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vid.v v8
+; RV32-NEXT:    vadd.vv v8, v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 3bb5e179e0d06..a9adc87d29c8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1193,15 +1193,11 @@ define void @mulhu_v2i64(ptr %x) {
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI69_0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vle32.v v9, (a1)
-; RV32-NEXT:    lui a1, 32
-; RV32-NEXT:    addi a1, a1, 1
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vmulhu.vv v8, v8, v9
-; RV32-NEXT:    vmv.s.x v9, a1
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vsext.vf4 v10, v9
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vv v8, v8, v10
+; RV32-NEXT:    vid.v v9
+; RV32-NEXT:    vadd.vi v9, v9, 1
+; RV32-NEXT:    vsrl.vv v8, v8, v9
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
@@ -1348,27 +1344,21 @@ define void @mulhs_v2i64(ptr %x) {
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vid.v v9
 ; RV32-NEXT:    addi a2, a1, 1365
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a2
 ; RV32-NEXT:    li a2, 63
 ; RV32-NEXT:    addi a1, a1, 1366
-; RV32-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
-; RV32-NEXT:    vmv.s.x v10, a1
-; RV32-NEXT:    lui a1, 16
-; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v9, 1
-; RV32-NEXT:    vrsub.vi v9, v9, 0
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vrsub.vi v11, v9, 0
+; RV32-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a1
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; RV32-NEXT:    vmulh.vv v10, v8, v10
-; RV32-NEXT:    vmadd.vv v9, v8, v10
-; RV32-NEXT:    vmv.s.x v8, a1
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vx v8, v9, a2
-; RV32-NEXT:    vsra.vv v9, v9, v10
+; RV32-NEXT:    vmadd.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vx v8, v11, a2
+; RV32-NEXT:    vsra.vv v9, v11, v9
 ; RV32-NEXT:    vadd.vv v8, v9, v8
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
index dcd16e093ea7e..9812e9832856d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x i8> @llvm.stepvector.v2i8()
 
@@ -161,19 +161,11 @@ define <16 x i32> @stepvector_v16i32() {
 declare <2 x i64> @llvm.stepvector.v2i64()
 
 define <2 x i64> @stepvector_v2i64() {
-; RV32-LABEL: stepvector_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v9, a0
-; RV32-NEXT:    vsext.vf4 v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: stepvector_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vid.v v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: stepvector_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    ret
   %v = call <2 x i64> @llvm.stepvector.v2i64()
   ret <2 x i64> %v
 }
@@ -181,20 +173,11 @@ define <2 x i64> @stepvector_v2i64() {
 declare <4 x i64> @llvm.stepvector.v4i64()
 
 define <4 x i64> @stepvector_v4i64() {
-; RV32-LABEL: stepvector_v4i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI14_0)
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vle8.v v10, (a0)
-; RV32-NEXT:    vsext.vf4 v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: stepvector_v4i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vid.v v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: stepvector_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.stepvector.v4i64()
   ret <4 x i64> %v
 }
@@ -202,20 +185,11 @@ define <4 x i64> @stepvector_v4i64() {
 declare <8 x i64> @llvm.stepvector.v8i64()
 
 define <8 x i64> @stepvector_v8i64() {
-; RV32-LABEL: stepvector_v8i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI15_0)
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vle8.v v12, (a0)
-; RV32-NEXT:    vsext.vf4 v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: stepvector_v8i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vid.v v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: stepvector_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    ret
   %v = call <8 x i64> @llvm.stepvector.v8i64()
   ret <8 x i64> %v
 }
@@ -223,21 +197,11 @@ define <8 x i64> @stepvector_v8i64() {
 declare <16 x i64> @llvm.stepvector.v16i64()
 
 define <16 x i64> @stepvector_v16i64() {
-; RV32-LABEL: stepvector_v16i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    lui a1, %hi(.LCPI16_0)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI16_0)
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vle8.v v16, (a1)
-; RV32-NEXT:    vsext.vf4 v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: stepvector_v16i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vid.v v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: stepvector_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    ret
   %v = call <16 x i64> @llvm.stepvector.v16i64()
   ret <16 x i64> %v
 }
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index daeb306b7e85f..c486692a90ff1 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -653,43 +653,33 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV32MV-NEXT:    mv a0, s1
 ; RV32MV-NEXT:    mv a1, s3
 ; RV32MV-NEXT:    call __moddi3
-; RV32MV-NEXT:    addi a2, sp, 16
-; RV32MV-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32MV-NEXT:    vlse64.v v8, (a2), zero
 ; RV32MV-NEXT:    addi a2, sp, 32
-; RV32MV-NEXT:    vl2r.v v10, (a2) # Unknown-size Folded Reload
+; RV32MV-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32MV-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT:    vslide1down.vx v10, v10, a0
-; RV32MV-NEXT:    vslide1down.vx v10, v10, a1
-; RV32MV-NEXT:    vslidedown.vi v10, v10, 2
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a0
+; RV32MV-NEXT:    addi a0, sp, 16
 ; RV32MV-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32MV-NEXT:    vand.vv v8, v10, v8
-; RV32MV-NEXT:    vsetivli zero, 3, e8, mf2, ta, ma
-; RV32MV-NEXT:    vmv.v.i v10, 1
-; RV32MV-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32MV-NEXT:    vmv.v.i v12, 0
-; RV32MV-NEXT:    vsetivli zero, 3, e8, mf2, tu, ma
-; RV32MV-NEXT:    vslideup.vi v12, v10, 2
-; RV32MV-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
-; RV32MV-NEXT:    vmv.v.i v10, 2
-; RV32MV-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
-; RV32MV-NEXT:    vslideup.vi v12, v10, 4
+; RV32MV-NEXT:    vlse64.v v10, (a0), zero
+; RV32MV-NEXT:    vid.v v12
 ; RV32MV-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT:    vsext.vf4 v10, v12
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV32MV-NEXT:    vslidedown.vi v8, v8, 2
 ; RV32MV-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32MV-NEXT:    vand.vv v8, v8, v10
+; RV32MV-NEXT:    vand.vv v10, v12, v10
 ; RV32MV-NEXT:    vmsne.vv v0, v8, v10
 ; RV32MV-NEXT:    vmv.v.i v8, 0
 ; RV32MV-NEXT:    vmerge.vim v8, v8, -1, v0
 ; RV32MV-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV32MV-NEXT:    vslidedown.vi v10, v8, 1
-; RV32MV-NEXT:    vslidedown.vi v11, v8, 2
-; RV32MV-NEXT:    vmv.x.s a0, v10
-; RV32MV-NEXT:    vmv.x.s a1, v11
+; RV32MV-NEXT:    vslidedown.vi v12, v8, 1
+; RV32MV-NEXT:    vslidedown.vi v13, v8, 2
 ; RV32MV-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 4
+; RV32MV-NEXT:    vmv.x.s a0, v12
+; RV32MV-NEXT:    vmv.x.s a1, v13
+; RV32MV-NEXT:    vslidedown.vi v12, v8, 5
 ; RV32MV-NEXT:    vmv.x.s a2, v10
-; RV32MV-NEXT:    vslidedown.vi v10, v8, 5
-; RV32MV-NEXT:    vmv.x.s a3, v10
+; RV32MV-NEXT:    vmv.x.s a3, v12
 ; RV32MV-NEXT:    slli a4, a1, 1
 ; RV32MV-NEXT:    sub a4, a4, a0
 ; RV32MV-NEXT:    srli a0, a2, 30

``````````

</details>


https://github.com/llvm/llvm-project/pull/132339


More information about the llvm-commits mailing list