[llvm] 8bbcb98 - [RISCV] Teach RISCVMergeBaseOffset about cases where we use SHXADD to add some immediates.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 9 16:07:47 PDT 2022
Author: Craig Topper
Date: 2022-06-09T16:07:35-07:00
New Revision: 8bbcb988481c4b072e5f545ef222078ff2a8df3b
URL: https://github.com/llvm/llvm-project/commit/8bbcb988481c4b072e5f545ef222078ff2a8df3b
DIFF: https://github.com/llvm/llvm-project/commit/8bbcb988481c4b072e5f545ef222078ff2a8df3b.diff
LOG: [RISCV] Teach RISCVMergeBaseOffset about cases where we use SHXADD to add some immediates.
For an addition with simm14 and simm15 immediates with 2 or 3 trailing bits,
we can use a shXadd instruction and an addi to do the addition.
This patch teaches RISCVMergeBaseOffset to see through this pattern.
I don't think the sh1add case occurs because we use two addis for that,
but I implemented it for completeness.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D127376
Added:
Modified:
llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 13a47eccb54fd..68fe06714a5b2 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -50,6 +50,9 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
int64_t Offset);
bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset);
+ bool matchShiftedOffset(MachineInstr &TailShXAdd, Register GSReg,
+ int64_t &Offset);
+
RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
MachineFunctionProperties getRequiredProperties() const override {
@@ -193,6 +196,55 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
return false;
}
+// Detect patterns for offsets that are passed into a SHXADD instruction.
+// The offset has 1,2, or 3 trailing zeros and fits in simm13, simm14, simm15.
+// The constant is created with addi voff, x0, C, and shXadd is used to
+// fill insert the trailing zeros and do the addition.
+//
+// HiLUI: lui vreg1, %hi(s)
+// LoADDI: addi vreg2, vreg1, %lo(s)
+// OffsetTail: addi voff, x0, C
+// TailAdd: shXadd vreg4, voff, vreg2
+bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd,
+ Register GAReg,
+ int64_t &Offset) {
+ assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
+ TailShXAdd.getOpcode() == RISCV::SH2ADD ||
+ TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
+ "Expected SHXADD instruction!");
+ // The first source is the shifted operand.
+ Register Rs1 = TailShXAdd.getOperand(1).getReg();
+
+ if (GAReg != TailShXAdd.getOperand(2).getReg())
+ return false;
+
+ // Can't fold if the register has more than one use.
+ if (!MRI->hasOneUse(Rs1))
+ return false;
+ // This can point to an ADDI X0, C.
+ MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
+ if (OffsetTail.getOpcode() != RISCV::ADDI)
+ return false;
+ if (!OffsetTail.getOperand(1).isReg() ||
+ OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
+ !OffsetTail.getOperand(2).isImm())
+ return false;
+
+ Offset = OffsetTail.getOperand(2).getImm();
+ assert(isInt<12>(Offset) && "Unexpected offset");
+
+ switch (TailShXAdd.getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCV::SH1ADD: Offset <<= 1; break;
+ case RISCV::SH2ADD: Offset <<= 2; break;
+ case RISCV::SH3ADD: Offset <<= 3; break;
+ }
+
+ LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
+ DeadInstrs.insert(&OffsetTail);
+ return true;
+}
+
bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI) {
Register DestReg = LoADDI.getOperand(0).getReg();
@@ -240,6 +292,18 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
}
+ case RISCV::SH1ADD:
+ case RISCV::SH2ADD:
+ case RISCV::SH3ADD: {
+ // The offset is too large to fit in the immediate field of ADDI.
+ // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
+ // (SH3ADD (ADDI X0, C), DestReg).
+ int64_t Offset;
+ if (!matchShiftedOffset(Tail, DestReg, Offset))
+ return false;
+ foldOffset(HiLUI, LoADDI, Tail, Offset);
+ return true;
+ }
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index e7ede7e976acf..872d1f72bf6fb 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
+; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZBA
+; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZBA
%struct.S = type { [40 x i32], i32, i32, i32, [4100 x i32], i32, i32, i32 }
@s = common dso_local global %struct.S zeroinitializer, align 4
@@ -239,3 +241,28 @@ define i8* @offset_addi_addi_neg() {
; CHECK-NEXT: ret
ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 -4000)
}
+
+; With Zba the constant 6424 is created with LI+SH2ADD.
+define i8* @offset_sh2add() {
+; CHECK-LABEL: offset_sh2add:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(bar+6424)
+; CHECK-NEXT: addi a0, a0, %lo(bar+6424)
+; CHECK-NEXT: ret
+ ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 6424)
+}
+
+; With Zba the constant 12848 is created with LI+SH3ADD.
+define i8* @offset_sh3add() {
+; CHECK-LABEL: offset_sh3add:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(bar+12848)
+; CHECK-NEXT: addi a0, a0, %lo(bar+12848)
+; CHECK-NEXT: ret
+ ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 12848)
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32I: {{.*}}
+; RV32ZBA: {{.*}}
+; RV64I: {{.*}}
+; RV64ZBA: {{.*}}
More information about the llvm-commits
mailing list