[llvm] [RISCV][VLOPT] Remove unnecessary passthru restriction (PR #124549)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 06:26:46 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
We currently check for passthrus in two places, on the instruction to reduce in isCandidate, and on the users in checkUsers.
We cannot reduce the VL if an instruction has a user that's a passthru, because the user will read elements past VL in the tail.
However it's fine to reduce an instruction if it itself contains a non-undef passthru. Since the VL can only be reduced, not increased, the previous tail will always remain the same.
---
Full diff: https://github.com/llvm/llvm-project/pull/124549.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp (+1-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (+16-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vl-opt.ll (+52-8)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 976c65e51c2059..be897bb2577961 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1143,27 +1143,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
if (MI.getNumDefs() != 1)
return false;
- // If we're not using VLMAX, then we need to be careful whether we are using
- // TA/TU when there is a non-undef Passthru. But when we are using VLMAX, it
- // does not matter whether we are using TA/TU with a non-undef Passthru, since
- // there are no tail elements to be preserved.
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
const MachineOperand &VLOp = MI.getOperand(VLOpNum);
- if (VLOp.isReg() || VLOp.getImm() != RISCV::VLMaxSentinel) {
- // If MI has a non-undef passthru, we will not try to optimize it since
- // that requires us to preserve tail elements according to TA/TU.
- // Otherwise, The MI has an undef Passthru, so it doesn't matter whether we
- // are using TA/TU.
- bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
- unsigned PassthruOpIdx = MI.getNumExplicitDefs();
- if (HasPassthru &&
- MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) {
- LLVM_DEBUG(
- dbgs() << " Not a candidate because it uses non-undef passthru"
- " with non-VLMAX VL\n");
- return false;
- }
- }
// If the VL is 1, then there is no need to reduce it. This is an
// optimization, not needed to preserve correctness.
@@ -1247,7 +1228,7 @@ std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
return std::nullopt;
}
- // Tied operands might pass through.
+ // If used as a passthru, elements past VL will be read.
if (UserOp.isTied()) {
LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
return std::nullopt;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 49db94e1a02df1..9dbe261b7cd054 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -3919,11 +3919,12 @@ define void @trunc_v6bf16(ptr %x) {
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
@@ -4002,11 +4003,12 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
@@ -4098,12 +4100,13 @@ define void @ceil_v6bf16(ptr %x) {
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 3
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
@@ -4189,12 +4192,13 @@ define void @ceil_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 3
+; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
@@ -4290,12 +4294,13 @@ define void @floor_v6bf16(ptr %x) {
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 2
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
@@ -4381,12 +4386,13 @@ define void @floor_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 2
+; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
@@ -4482,12 +4488,13 @@ define void @round_v6bf16(ptr %x) {
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 4
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
@@ -4573,12 +4580,13 @@ define void @round_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 4
+; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index 1cc30f077feb4a..3e49da014d56fe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -107,7 +107,8 @@ define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x
ret <vscale x 4 x i32> %w
}
-; Test case to make sure VL won't propgate if using tail-undisturbed policy.
+; We can propagate VL to a tail-undisturbed policy, provided none of its users
+; are passthrus (i.e. read past VL).
define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
; CHECK-LABEL: different_vl_with_tu:
; CHECK: # %bb.0:
@@ -118,22 +119,65 @@ define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <v
; CHECK-NEXT: vadd.vv v8, v14, v10
; CHECK-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
- %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl2)
ret <vscale x 4 x i32> %w
}
-; Test case to make sure VL won't propgate if using tail-undisturbed policy.
+; We can propagate VL to a tail-undisturbed policy, provided none of its users
+; are passthrus (i.e. read past VL).
define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
-; CHECK-LABEL: different_imm_vl_with_tu:
+; NOVLOPT-LABEL: different_imm_vl_with_tu:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v14, v10
+; NOVLOPT-NEXT: vadd.vv v14, v10, v12
+; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; NOVLOPT-NEXT: vadd.vv v8, v14, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: different_imm_vl_with_tu:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v14, v10
+; VLOPT-NEXT: vadd.vv v14, v10, v12
+; VLOPT-NEXT: vadd.vv v8, v14, v10
+; VLOPT-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
+ ret <vscale x 4 x i32> %w
+}
+
+; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
+; are demanded.
+define <vscale x 4 x i32> @different_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL: different_vl_as_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vadd.vv v12, v8, v10
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; CHECK-NEXT: vadd.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl2)
+ ret <vscale x 4 x i32> %w
+}
+
+; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
+; are demanded.
+define <vscale x 4 x i32> @different_imm_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL: different_imm_vl_as_passthru:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; CHECK-NEXT: vmv2r.v v14, v10
-; CHECK-NEXT: vadd.vv v14, v10, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vadd.vv v12, v8, v10
; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; CHECK-NEXT: vadd.vv v8, v14, v10
+; CHECK-NEXT: vadd.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
- %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
ret <vscale x 4 x i32> %w
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/124549
More information about the llvm-commits
mailing list