[llvm] [RISCV] Introduce local peephole to reduce VLs based on demanded VL (PR #104689)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 17 18:17:19 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
This is a fairly narrow transform (at the moment) to reduce the VLs of instructions feeding a store with a smaller VL. Note that the goal of this transform isn't really to reduce VL - it's to reduce VL *toggles*. To our knowledge, small reductions in VL without also changing LMUL are generally not profitable on existing hardware.
For a single use instruction without side effects, fp exceptions, or a result dependency on VL, reducing VL is legal if only a subset of elements are legal. We'd already implemented this logic for vmv.v.v, and this patch simply applies it to stores as an alternate root.
Longer term, I plan to extend this to other root instructions (i.e. different kind of stores, reduces, etc..), and add a more general recursive walkback through operands.
One risk with the dataflow based approach is that we could be reducing VL of an instruction scheduled in a region with the wider VL (i.e. mixed mode computations) forcing an additional VL toggle. An example of this is the @<!-- -->insert_subvector_dag_loop test case, but it doesn't appear to happen widely. I think this is a risk we should accept.
---
Patch is 50.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104689.diff
15 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp (+102-48)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (+5-59)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll (-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll (-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll (+4-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll (+1-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll (+6-56)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-non-power-of-2.ll (-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll (+1-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll (+1-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+2-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/zve32-types.ll (-8)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 2abed1ac984e35..b2891376ea5e47 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -61,6 +61,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
}
private:
+ bool tryToReduceVL(MachineInstr &MI) const;
bool convertToVLMAX(MachineInstr &MI) const;
bool convertToWholeRegister(MachineInstr &MI) const;
bool convertToUnmasked(MachineInstr &MI) const;
@@ -81,6 +82,101 @@ char RISCVVectorPeephole::ID = 0;
INITIALIZE_PASS(RISCVVectorPeephole, DEBUG_TYPE, "RISC-V Fold Masks", false,
false)
+
+/// Given two VL operands, returns the one known to be the smallest or nullptr
+/// if unknown.
+static const MachineOperand *getKnownMinVL(const MachineOperand *LHS,
+ const MachineOperand *RHS) {
+ if (LHS->isReg() && RHS->isReg() && LHS->getReg().isVirtual() &&
+ LHS->getReg() == RHS->getReg())
+ return LHS;
+ if (LHS->isImm() && LHS->getImm() == RISCV::VLMaxSentinel)
+ return RHS;
+ if (RHS->isImm() && RHS->getImm() == RISCV::VLMaxSentinel)
+ return LHS;
+ if (!LHS->isImm() || !RHS->isImm())
+ return nullptr;
+ return LHS->getImm() <= RHS->getImm() ? LHS : RHS;
+}
+
+static unsigned getSEWLMULRatio(const MachineInstr &MI) {
+ RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
+ unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+ return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
+}
+
+// Attempt to reduce the VL of an instruction whose sole use is feeding a
+// instruction with a narrower VL. This currently works backwards from the
+// user instruction (which might have a smaller VL).
+bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
+ // Note that the goal here is a bit multifaceted.
+ // 1) For store's reducing the VL of the value being stored may help to
+ // reduce VL toggles. This is somewhat of an artifact of the fact we
+ // promote arithmetic instructions but VL predicate stores.
+ // 2) For vmv.v.v reducing VL eagerly on the source instruction allows us
+ // to share code with the foldVMV_V_V transform below.
+ //
+ // Note that to the best of our knowledge, reducing VL is generally not
+ // a significant win on real hardware unless we can also reduce LMUL which
+ // this code doesn't try to do.
+ //
+ // TODO: We can handle a bunch more instructions here, and probably
+ // recurse backwards through operands too.
+ unsigned SrcIdx = 0;
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VSE8_V:
+ case RISCV::VSE16_V:
+ case RISCV::VSE32_V:
+ case RISCV::VSE64_V:
+ break;
+ case RISCV::VMV_V_V:
+ SrcIdx = 2;
+ break;
+ }
+
+ MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+ if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel)
+ return false;
+
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ // Note: one *use*, not one *user*.
+ if (!MRI->hasOneUse(SrcReg))
+ return false;
+
+ MachineInstr *Src = MRI->getVRegDef(SrcReg);
+ if (!Src || Src->hasUnmodeledSideEffects() ||
+ Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
+ !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
+ !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
+ return false;
+
+ // Src needs to have the same VLMAX as MI
+ if (getSEWLMULRatio(MI) != getSEWLMULRatio(*Src))
+ return false;
+
+ bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
+ TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
+ if (ActiveElementsAffectResult || Src->mayRaiseFPException())
+ return false;
+
+ MachineOperand &SrcVL =
+ Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
+ const MachineOperand *MinVL = getKnownMinVL(&VL, &SrcVL);
+ if (!MinVL || MinVL == &SrcVL)
+ return false;
+
+ if (MinVL->isImm())
+ SrcVL.ChangeToImmediate(MinVL->getImm());
+ else if (MinVL->isReg())
+ SrcVL.ChangeToRegister(MinVL->getReg(), false);
+
+ // TODO: For instructions with a passthru, we could clear the passthru
+ // and tail policy since we've just proven the tail is not demanded.
+ return true;
+}
+
/// Check if an operand is an immediate or a materialized ADDI $x0, imm.
std::optional<unsigned>
RISCVVectorPeephole::getConstant(const MachineOperand &VL) const {
@@ -325,22 +421,6 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
return true;
}
-/// Given two VL operands, returns the one known to be the smallest or nullptr
-/// if unknown.
-static const MachineOperand *getKnownMinVL(const MachineOperand *LHS,
- const MachineOperand *RHS) {
- if (LHS->isReg() && RHS->isReg() && LHS->getReg().isVirtual() &&
- LHS->getReg() == RHS->getReg())
- return LHS;
- if (LHS->isImm() && LHS->getImm() == RISCV::VLMaxSentinel)
- return RHS;
- if (RHS->isImm() && RHS->getImm() == RISCV::VLMaxSentinel)
- return LHS;
- if (!LHS->isImm() || !RHS->isImm())
- return nullptr;
- return LHS->getImm() <= RHS->getImm() ? LHS : RHS;
-}
-
/// Check if it's safe to move From down to To, checking that no physical
/// registers are clobbered.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
@@ -362,12 +442,6 @@ static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
return From.isSafeToMove(SawStore);
}
-static unsigned getSEWLMULRatio(const MachineInstr &MI) {
- RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
- unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
- return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
-}
-
/// If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
/// into it.
///
@@ -404,33 +478,17 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
SrcPassthru.getReg() != Passthru.getReg())
return false;
- // Because Src and MI have the same passthru, we can use either AVL as long as
- // it's the smaller of the two.
- //
- // (src pt, ..., vl=5) x x x x x|. . .
- // (vmv.v.v pt, src, vl=3) x x x|. . . . .
- // ->
- // (src pt, ..., vl=3) x x x|. . . . .
- //
- // (src pt, ..., vl=3) x x x|. . . . .
- // (vmv.v.v pt, src, vl=6) x x x . . .|. .
- // ->
- // (src pt, ..., vl=3) x x x|. . . . .
+ // Src VL will have already been reduced if legal (see tryToReduceVL),
+ // so we don't need to handle a smaller source VL here. However, the
+ // user's VL may be larger
MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
const MachineOperand *MinVL = getKnownMinVL(&MI.getOperand(3), &SrcVL);
- if (!MinVL)
- return false;
-
- bool VLChanged = !MinVL->isIdenticalTo(SrcVL);
- bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
- TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
-
- if (VLChanged && (ActiveElementsAffectResult || Src->mayRaiseFPException()))
+ if (!MinVL || !MinVL->isIdenticalTo(SrcVL))
return false;
// If Src ends up using MI's passthru/VL, move it so it can access it.
// TODO: We don't need to do this if they already dominate Src.
- if (!SrcVL.isIdenticalTo(*MinVL) || !SrcPassthru.isIdenticalTo(Passthru)) {
+ if (!SrcPassthru.isIdenticalTo(Passthru)) {
if (!isSafeToMove(*Src, MI))
return false;
Src->moveBefore(&MI);
@@ -445,11 +503,6 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
*Src->getParent()->getParent()));
}
- if (MinVL->isImm())
- SrcVL.ChangeToImmediate(MinVL->getImm());
- else if (MinVL->isReg())
- SrcVL.ChangeToRegister(MinVL->getReg(), false);
-
// Use a conservative tu,mu policy, RISCVInsertVSETVLI will relax it if
// passthru is undef.
Src->getOperand(RISCVII::getVecPolicyOpNum(Src->getDesc()))
@@ -498,6 +551,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : make_early_inc_range(MBB)) {
Changed |= convertToVLMAX(MI);
+ Changed |= tryToReduceVL(MI);
Changed |= convertToUnmasked(MI);
Changed |= convertToWholeRegister(MI);
Changed |= convertVMergeToVMv(MI);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
index f607add17b4b9d..ac7d3d9109e39c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -41,8 +41,8 @@ define void @abs_v6i16(ptr %x) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vrsub.vi v9, v8, 0
-; CHECK-NEXT: vmax.vv v8, v8, v9
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index d25312268ada62..a6e224d475a312 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -47,9 +47,7 @@ define void @fadd_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfadd.vv v8, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -173,9 +171,7 @@ define void @fsub_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfsub.vv v8, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -299,9 +295,7 @@ define void @fmul_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmul.vv v8, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -425,9 +419,7 @@ define void @fdiv_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfdiv.vv v8, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -546,9 +538,7 @@ define void @fneg_v6f16(ptr %x) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -658,9 +648,7 @@ define void @fabs_v6f16(ptr %x) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -778,9 +766,7 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -911,9 +897,7 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -1053,9 +1037,7 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -1204,8 +1186,8 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFH-NEXT: vle16.v v9, (a0)
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH-NEXT: vfncvt.f.f.w v10, v8
-; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
+; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -1334,9 +1316,7 @@ define void @sqrt_v6f16(ptr %x) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfsqrt.v v8, v8
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -1459,9 +1439,7 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
; ZVFH-NEXT: vle16.v v10, (a2)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmacc.vv v10, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
@@ -1609,9 +1587,7 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
; ZVFH-NEXT: vle16.v v10, (a2)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmsac.vv v10, v8, v9
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
@@ -2246,9 +2222,7 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfadd.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -2386,9 +2360,7 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfadd.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -2526,9 +2498,7 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfsub.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -2666,9 +2636,7 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -2806,9 +2774,7 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmul.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -2946,9 +2912,7 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmul.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -3086,9 +3050,7 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -3226,9 +3188,7 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -3371,9 +3331,7 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
@@ -3526,9 +3484,7 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
@@ -3687,9 +3643,7 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmsac.vf v9, fa0, v8
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
@@ -3893,9 +3847,8 @@ define void @trunc_v6f16(ptr %x) {
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
%a = load <6 x half>, ptr %x
@@ -4023,9 +3976,8 @@ define void @ceil_v6f16(ptr %x) {
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
; ZVFH-NEXT: fsrm a1
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -4210,9 +4162,8 @@ define void @floor_v6f16(ptr %x) {
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
; ZVFH-NEXT: fsrm a1
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -4397,9 +4348,8 @@ define void @round_v6f16(ptr %x) {
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
; ZVFH-NEXT: fsrm a1
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
@@ -4782,9 +4732,7 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vle16.v v9, (a1)
; ZVFH-NEXT: vle16.v v10, (a2)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfmacc.vv v10, v8, v9
-; ZVFH-NEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/104689
More information about the llvm-commits
mailing list