[llvm] [RISCV] Lower BUILD_VECTOR with i64 type to VID on RV32 if possible (PR #132339)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 22:26:56 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Jim Lin (tclin914)
<details>
<summary>Changes</summary>
The element type i64 of the BUILD_VECTOR is not legal on RV32. It doesn't catch the VID pattern after being legalized to i64.
So try to customized lower it to VID before type legalization.
---
Full diff: https://github.com/llvm/llvm-project/pull/132339.diff
5 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+83-59)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+5-11)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll (+11-21)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll (+22-58)
- (modified) llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll (+15-25)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 132faf5b85c1a..ff0ffa16f9aa6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1281,6 +1281,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
+
+ // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
+ setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom);
}
setOperationAction(
@@ -3601,6 +3604,78 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
return Gather;
}
+static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
+
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+
+ SDLoc DL(Op);
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
+ int64_t StepNumerator = SimpleVID->StepNumerator;
+ unsigned StepDenominator = SimpleVID->StepDenominator;
+ int64_t Addend = SimpleVID->Addend;
+
+ assert(StepNumerator != 0 && "Invalid step");
+ bool Negate = false;
+ int64_t SplatStepVal = StepNumerator;
+ unsigned StepOpcode = ISD::MUL;
+ // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
+ // anyway as the shift of 63 won't fit in uimm5.
+ if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
+ isPowerOf2_64(std::abs(StepNumerator))) {
+ Negate = StepNumerator < 0;
+ StepOpcode = ISD::SHL;
+ SplatStepVal = Log2_64(std::abs(StepNumerator));
+ }
+
+ // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
+ // threshold since it's the immediate value many RVV instructions accept.
+ // There is no vmul.vi instruction so ensure multiply constant can fit in
+ // a single addi instruction.
+ if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
+ (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
+ isPowerOf2_32(StepDenominator) &&
+ (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
+ MVT VIDVT =
+ VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
+ MVT VIDContainerVT =
+ getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
+ // Convert right out of the scalable type so we can use standard ISD
+ // nodes for the rest of the computation. If we used scalable types with
+ // these, we'd lose the fixed-length vector info and generate worse
+ // vsetvli code.
+ VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
+ if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
+ (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
+ SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
+ VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
+ }
+ if (StepDenominator != 1) {
+ SDValue SplatStep =
+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
+ VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
+ }
+ if (Addend != 0 || Negate) {
+ SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
+ VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
+ VID);
+ }
+ if (VT.isFloatingPoint()) {
+ // TODO: Use vfwcvt to reduce register pressure.
+ VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
+ }
+ return VID;
+ }
+ }
+
+ return SDValue();
+}
+
/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
/// which constitute a large proportion of the elements. In such cases we can
/// splat a vector with the dominant element and make up the shortfall with
@@ -3818,64 +3893,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// Try and match index sequences, which we can lower to the vid instruction
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
- if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
- int64_t StepNumerator = SimpleVID->StepNumerator;
- unsigned StepDenominator = SimpleVID->StepDenominator;
- int64_t Addend = SimpleVID->Addend;
-
- assert(StepNumerator != 0 && "Invalid step");
- bool Negate = false;
- int64_t SplatStepVal = StepNumerator;
- unsigned StepOpcode = ISD::MUL;
- // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
- // anyway as the shift of 63 won't fit in uimm5.
- if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
- isPowerOf2_64(std::abs(StepNumerator))) {
- Negate = StepNumerator < 0;
- StepOpcode = ISD::SHL;
- SplatStepVal = Log2_64(std::abs(StepNumerator));
- }
-
- // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
- // threshold since it's the immediate value many RVV instructions accept.
- // There is no vmul.vi instruction so ensure multiply constant can fit in
- // a single addi instruction.
- if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
- (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
- isPowerOf2_32(StepDenominator) &&
- (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
- MVT VIDVT =
- VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
- MVT VIDContainerVT =
- getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
- SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
- // Convert right out of the scalable type so we can use standard ISD
- // nodes for the rest of the computation. If we used scalable types with
- // these, we'd lose the fixed-length vector info and generate worse
- // vsetvli code.
- VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
- if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
- (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
- SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
- VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
- }
- if (StepDenominator != 1) {
- SDValue SplatStep =
- DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
- VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
- }
- if (Addend != 0 || Negate) {
- SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
- VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
- VID);
- }
- if (VT.isFloatingPoint()) {
- // TODO: Use vfwcvt to reduce register pressure.
- VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
- }
- return VID;
- }
- }
+ if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
+ return Res;
// For very small build_vectors, use a single scalar insert of a constant.
// TODO: Base this on constant rematerialization cost, not size.
@@ -7473,8 +7492,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVECTOR_REVERSE(Op, DAG);
case ISD::VECTOR_SPLICE:
return lowerVECTOR_SPLICE(Op, DAG);
- case ISD::BUILD_VECTOR:
+ case ISD::BUILD_VECTOR: {
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ if (!Subtarget.is64Bit() && EltVT == MVT::i64)
+ return lowerBuildVectorViaVID(Op, DAG, Subtarget);
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
+ }
case ISD::SPLAT_VECTOR: {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index a91263e85e9e8..41f2e9afbb07d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -290,15 +290,11 @@ define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3)
ret void
}
-; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization.
define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step1_add0_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI25_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI25_0)
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle8.v v10, (a0)
-; RV32-NEXT: vsext.vf4 v8, v10
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vid.v v8
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
@@ -323,11 +319,9 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step2_add0_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI26_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI26_0)
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle8.v v10, (a0)
-; RV32-NEXT: vsext.vf4 v8, v10
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vid.v v8
+; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 3bb5e179e0d06..a9adc87d29c8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1193,15 +1193,11 @@ define void @mulhu_v2i64(ptr %x) {
; RV32-NEXT: addi a1, a1, %lo(.LCPI69_0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v9, (a1)
-; RV32-NEXT: lui a1, 32
-; RV32-NEXT: addi a1, a1, 1
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vmulhu.vv v8, v8, v9
-; RV32-NEXT: vmv.s.x v9, a1
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vsext.vf4 v10, v9
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vv v8, v8, v10
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vadd.vi v9, v9, 1
+; RV32-NEXT: vsrl.vv v8, v8, v9
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
@@ -1348,27 +1344,21 @@ define void @mulhs_v2i64(ptr %x) {
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vid.v v9
; RV32-NEXT: addi a2, a1, 1365
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: li a2, 63
; RV32-NEXT: addi a1, a1, 1366
-; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: lui a1, 16
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v9, 1
-; RV32-NEXT: vrsub.vi v9, v9, 0
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vrsub.vi v11, v9, 0
+; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
+; RV32-NEXT: vmv.s.x v10, a1
+; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32-NEXT: vmulh.vv v10, v8, v10
-; RV32-NEXT: vmadd.vv v9, v8, v10
-; RV32-NEXT: vmv.s.x v8, a1
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vsext.vf4 v10, v8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vx v8, v9, a2
-; RV32-NEXT: vsra.vv v9, v9, v10
+; RV32-NEXT: vmadd.vv v11, v8, v10
+; RV32-NEXT: vsrl.vx v8, v11, a2
+; RV32-NEXT: vsra.vv v9, v11, v9
; RV32-NEXT: vadd.vv v8, v9, v8
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
index dcd16e093ea7e..9812e9832856d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
declare <2 x i8> @llvm.stepvector.v2i8()
@@ -161,19 +161,11 @@ define <16 x i32> @stepvector_v16i32() {
declare <2 x i64> @llvm.stepvector.v2i64()
define <2 x i64> @stepvector_v2i64() {
-; RV32-LABEL: stepvector_v2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsext.vf4 v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <2 x i64> @llvm.stepvector.v2i64()
ret <2 x i64> %v
}
@@ -181,20 +173,11 @@ define <2 x i64> @stepvector_v2i64() {
declare <4 x i64> @llvm.stepvector.v4i64()
define <4 x i64> @stepvector_v4i64() {
-; RV32-LABEL: stepvector_v4i64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI14_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI14_0)
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle8.v v10, (a0)
-; RV32-NEXT: vsext.vf4 v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.stepvector.v4i64()
ret <4 x i64> %v
}
@@ -202,20 +185,11 @@ define <4 x i64> @stepvector_v4i64() {
declare <8 x i64> @llvm.stepvector.v8i64()
define <8 x i64> @stepvector_v8i64() {
-; RV32-LABEL: stepvector_v8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI15_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0)
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle8.v v12, (a0)
-; RV32-NEXT: vsext.vf4 v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <8 x i64> @llvm.stepvector.v8i64()
ret <8 x i64> %v
}
@@ -223,21 +197,11 @@ define <8 x i64> @stepvector_v8i64() {
declare <16 x i64> @llvm.stepvector.v16i64()
define <16 x i64> @stepvector_v16i64() {
-; RV32-LABEL: stepvector_v16i64:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: lui a1, %hi(.LCPI16_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI16_0)
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vle8.v v16, (a1)
-; RV32-NEXT: vsext.vf4 v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v16i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <16 x i64> @llvm.stepvector.v16i64()
ret <16 x i64> %v
}
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index daeb306b7e85f..c486692a90ff1 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -653,43 +653,33 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: mv a0, s1
; RV32MV-NEXT: mv a1, s3
; RV32MV-NEXT: call __moddi3
-; RV32MV-NEXT: addi a2, sp, 16
-; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32MV-NEXT: vlse64.v v8, (a2), zero
; RV32MV-NEXT: addi a2, sp, 32
-; RV32MV-NEXT: vl2r.v v10, (a2) # Unknown-size Folded Reload
+; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT: vslide1down.vx v10, v10, a0
-; RV32MV-NEXT: vslide1down.vx v10, v10, a1
-; RV32MV-NEXT: vslidedown.vi v10, v10, 2
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: addi a0, sp, 16
; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32MV-NEXT: vand.vv v8, v10, v8
-; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.i v10, 1
-; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.i v12, 0
-; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
-; RV32MV-NEXT: vslideup.vi v12, v10, 2
-; RV32MV-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.i v10, 2
-; RV32MV-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
-; RV32MV-NEXT: vslideup.vi v12, v10, 4
+; RV32MV-NEXT: vlse64.v v10, (a0), zero
+; RV32MV-NEXT: vid.v v12
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT: vsext.vf4 v10, v12
+; RV32MV-NEXT: vslide1down.vx v8, v8, a1
+; RV32MV-NEXT: vslidedown.vi v8, v8, 2
; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32MV-NEXT: vand.vv v8, v8, v10
+; RV32MV-NEXT: vand.vv v10, v12, v10
; RV32MV-NEXT: vmsne.vv v0, v8, v10
; RV32MV-NEXT: vmv.v.i v8, 0
; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32MV-NEXT: vslidedown.vi v10, v8, 1
-; RV32MV-NEXT: vslidedown.vi v11, v8, 2
-; RV32MV-NEXT: vmv.x.s a0, v10
-; RV32MV-NEXT: vmv.x.s a1, v11
+; RV32MV-NEXT: vslidedown.vi v12, v8, 1
+; RV32MV-NEXT: vslidedown.vi v13, v8, 2
; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32MV-NEXT: vslidedown.vi v10, v8, 4
+; RV32MV-NEXT: vmv.x.s a0, v12
+; RV32MV-NEXT: vmv.x.s a1, v13
+; RV32MV-NEXT: vslidedown.vi v12, v8, 5
; RV32MV-NEXT: vmv.x.s a2, v10
-; RV32MV-NEXT: vslidedown.vi v10, v8, 5
-; RV32MV-NEXT: vmv.x.s a3, v10
+; RV32MV-NEXT: vmv.x.s a3, v12
; RV32MV-NEXT: slli a4, a1, 1
; RV32MV-NEXT: sub a4, a4, a0
; RV32MV-NEXT: srli a0, a2, 30
``````````
</details>
https://github.com/llvm/llvm-project/pull/132339
More information about the llvm-commits
mailing list