[llvm] 46d1f30 - [RISCV][InsertSETVTLI] Handle large immediates in backwards walk (#75409)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 07:36:12 PST 2023
Author: Philip Reames
Date: 2023-12-14T07:36:07-08:00
New Revision: 46d1f308824909b550b042c926a9c1e974cf2454
URL: https://github.com/llvm/llvm-project/commit/46d1f308824909b550b042c926a9c1e974cf2454
DIFF: https://github.com/llvm/llvm-project/commit/46d1f308824909b550b042c926a9c1e974cf2454.diff
LOG: [RISCV][InsertSETVTLI] Handle large immediates in backwards walk (#75409)
When doing our backwards walk, we were not handling the case where the
AVL was defined by a register whose definition was an ADDI xN, x0,
<imm>. Doing so (as we already do in the forward pass) allows us to
prune a few more transitions.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index bc04b9f768e57..b2d36b362b3a0 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1455,9 +1455,19 @@ static void doUnion(DemandedFields &A, DemandedFields B) {
A.MaskPolicy |= B.MaskPolicy;
}
-static bool isNonZeroAVL(const MachineOperand &MO) {
- if (MO.isReg())
- return RISCV::X0 == MO.getReg();
+static bool isNonZeroAVL(const MachineOperand &MO,
+ const MachineRegisterInfo &MRI) {
+ if (MO.isReg()) {
+ if (MO.getReg() == RISCV::X0)
+ return true;
+ if (MachineInstr *MI = MRI.getVRegDef(MO.getReg());
+ MI && MI->getOpcode() == RISCV::ADDI &&
+ MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
+ MI->getOperand(1).getReg() == RISCV::X0 &&
+ MI->getOperand(2).getImm() != 0)
+ return true;
+ return false;
+ }
assert(MO.isImm());
return 0 != MO.getImm();
}
@@ -1466,7 +1476,8 @@ static bool isNonZeroAVL(const MachineOperand &MO) {
// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
const MachineInstr &MI,
- const DemandedFields &Used) {
+ const DemandedFields &Used,
+ const MachineRegisterInfo &MRI) {
// If the VL values aren't equal, return false if either a) the former is
// demanded, or b) we can't rewrite the former to be the later for
// implementation reasons.
@@ -1479,8 +1490,8 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
if (Used.VLZeroness) {
if (isVLPreservingConfig(PrevMI))
return false;
- if (!isNonZeroAVL(MI.getOperand(1)) ||
- !isNonZeroAVL(PrevMI.getOperand(1)))
+ if (!isNonZeroAVL(MI.getOperand(1), MRI) ||
+ !isNonZeroAVL(PrevMI.getOperand(1), MRI))
return false;
}
@@ -1524,14 +1535,23 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
+ } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
MI.getOperand(0).setIsDead(false);
+ Register OldVLReg;
+ if (MI.getOperand(1).isReg())
+ OldVLReg = MI.getOperand(1).getReg();
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
+ if (OldVLReg) {
+ MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
+ if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
+ MRI->use_nodbg_empty(OldVLReg))
+ VLOpDef->eraseFromParent();
+ }
MI.setDesc(NextMI->getDesc());
}
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index a3f41fd842222..6ebbc37f4afd7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -51,10 +51,8 @@ define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v32i32_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: ret
%b = insertelement <32 x i32> %a, i32 %y, i32 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index a2f367abb9ddd..d72ee25adb98d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -12449,14 +12449,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_7: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: li a3, 32
-; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: .LBB98_8: # %else11
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
@@ -12582,14 +12579,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB98_32
; RV64ZVE32F-NEXT: .LBB98_31: # %cond.load58
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: li a3, 32
-; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20
; RV64ZVE32F-NEXT: .LBB98_32: # %else59
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
@@ -12724,14 +12718,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: li a3, 32
-; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
@@ -12757,14 +12748,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bgez a2, .LBB98_26
; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: li a3, 32
-; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
@@ -12811,14 +12799,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bgez a2, .LBB98_37
; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load70
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: li a3, 32
-; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
@@ -12844,14 +12829,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bgez a2, .LBB98_42
; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: li a3, 32
-; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bltz a2, .LBB98_43
More information about the llvm-commits
mailing list