[llvm] da5b1bf - [RISCV] Teach RISCVMergeBaseOffset to merge %lo/%pcrel_lo into load/store after folding arithmetic.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 1 15:35:45 PDT 2022
Author: Craig Topper
Date: 2022-08-01T15:33:21-07:00
New Revision: da5b1bf5bb0f1c3d7553a286ad50245bb11694b9
URL: https://github.com/llvm/llvm-project/commit/da5b1bf5bb0f1c3d7553a286ad50245bb11694b9
DIFF: https://github.com/llvm/llvm-project/commit/da5b1bf5bb0f1c3d7553a286ad50245bb11694b9.diff
LOG: [RISCV] Teach RISCVMergeBaseOffset to merge %lo/%pcrel_lo into load/store after folding arithmetic.
It's possible we have:
lui a0, %hi(sym)
addi a0, %lo(sym)
addi a0, <offset1>
lw a0, <offset2>(a0)
We want to arrive at
lui a0, %hi(sym+offset1+offset2)
lw a0, %lo(sym+offset1+offset2)
We currently fail to do this because we only consider loads/stores
if we didn't find any arithmetic.
This patch splits arithmetic folding and load/store folding into
two separate phases. The load/store folding can no longer assume
the offset in hi/lo is 0 so we must combine the offsets. I've applied
the same simm32 limit that we applied in the arithmetic folding.
Reviewed By: luismarques
Differential Revision: https://reviews.llvm.org/D130931
Added:
Modified:
llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 38e583d4cf85..7b2fb9f0d686 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -42,6 +42,8 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
MachineInstr &TailShXAdd, Register GSReg);
+ bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
+
RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
MachineFunctionProperties getRequiredProperties() const override {
@@ -267,62 +269,67 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
MachineInstr &Lo) {
Register DestReg = Lo.getOperand(0).getReg();
- // First, look for arithmetic instructions we can get an offset from.
+ // Look for arithmetic instructions we can get an offset from.
// We might be able to remove the arithmetic instructions by folding the
// offset into the LUI+ADDI.
- if (MRI->hasOneUse(DestReg)) {
- // Lo has only one use.
- MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
- switch (Tail.getOpcode()) {
- default:
- LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
- << Tail);
- break;
- case RISCV::ADDI: {
- // Offset is simply an immediate operand.
- int64_t Offset = Tail.getOperand(2).getImm();
-
- // We might have two ADDIs in a row.
- Register TailDestReg = Tail.getOperand(0).getReg();
- if (MRI->hasOneUse(TailDestReg)) {
- MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
- if (TailTail.getOpcode() == RISCV::ADDI) {
- Offset += TailTail.getOperand(2).getImm();
- LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
- foldOffset(Hi, Lo, TailTail, Offset);
- Tail.eraseFromParent();
- return true;
- }
- }
+ if (!MRI->hasOneUse(DestReg))
+ return false;
- LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
- foldOffset(Hi, Lo, Tail, Offset);
- return true;
- }
- case RISCV::ADD: {
- // The offset is too large to fit in the immediate field of ADDI.
- // This can be in two forms:
- // 1) LUI hi_Offset followed by:
- // ADDI lo_offset
- // This happens in case the offset has non zero bits in
- // both hi 20 and lo 12 bits.
- // 2) LUI (offset20)
- // This happens in case the lower 12 bits of the offset are zeros.
- return foldLargeOffset(Hi, Lo, Tail, DestReg);
- }
- case RISCV::SH1ADD:
- case RISCV::SH2ADD:
- case RISCV::SH3ADD: {
- // The offset is too large to fit in the immediate field of ADDI.
- // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
- // (SH3ADD (ADDI X0, C), DestReg).
- return foldShiftedOffset(Hi, Lo, Tail, DestReg);
- }
+ // Lo has only one use.
+ MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
+ switch (Tail.getOpcode()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
+ << Tail);
+ break;
+ case RISCV::ADDI: {
+ // Offset is simply an immediate operand.
+ int64_t Offset = Tail.getOperand(2).getImm();
+
+ // We might have two ADDIs in a row.
+ Register TailDestReg = Tail.getOperand(0).getReg();
+ if (MRI->hasOneUse(TailDestReg)) {
+ MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
+ if (TailTail.getOpcode() == RISCV::ADDI) {
+ Offset += TailTail.getOperand(2).getImm();
+ LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
+ foldOffset(Hi, Lo, TailTail, Offset);
+ Tail.eraseFromParent();
+ return true;
+ }
}
+
+ LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
+ foldOffset(Hi, Lo, Tail, Offset);
+ return true;
+ }
+ case RISCV::ADD:
+ // The offset is too large to fit in the immediate field of ADDI.
+ // This can be in two forms:
+ // 1) LUI hi_Offset followed by:
+ // ADDI lo_offset
+ // This happens in case the offset has non zero bits in
+ // both hi 20 and lo 12 bits.
+ // 2) LUI (offset20)
+ // This happens in case the lower 12 bits of the offset are zeros.
+ return foldLargeOffset(Hi, Lo, Tail, DestReg);
+ case RISCV::SH1ADD:
+ case RISCV::SH2ADD:
+ case RISCV::SH3ADD:
+ // The offset is too large to fit in the immediate field of ADDI.
+ // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
+ // (SH3ADD (ADDI X0, C), DestReg).
+ return foldShiftedOffset(Hi, Lo, Tail, DestReg);
}
- // We didn't find an arithmetic instruction. If all the uses are memory ops
- // with the same offset, we can transform:
+ return false;
+}
+
+bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
+ MachineInstr &Lo) {
+ Register DestReg = Lo.getOperand(0).getReg();
+
+ // If all the uses are memory ops with the same offset, we can transform:
//
// 1. (medlow pattern):
// Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
@@ -375,10 +382,20 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
// We found a common offset.
// Update the offsets in global address lowering.
- Hi.getOperand(1).setOffset(*CommonOffset);
+ // We may have already folded some arithmetic so we need to add to any
+ // existing offset.
+ int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset;
+ // RV32 ignores the upper 32 bits.
+ if (!ST->is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+ // We can only fold simm32 offsets.
+ if (!isInt<32>(NewOffset))
+ return false;
+
+ Hi.getOperand(1).setOffset(NewOffset);
MachineOperand &ImmOp = Lo.getOperand(2);
if (Hi.getOpcode() != RISCV::AUIPC)
- ImmOp.setOffset(*CommonOffset);
+ ImmOp.setOffset(NewOffset);
// Update the immediate in the load/store instructions to add the offset.
for (MachineInstr &UseMI :
@@ -411,6 +428,7 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
LLVM_DEBUG(dbgs() << " Found lowered global address: "
<< *Hi.getOperand(2).getGlobal() << "\n");
MadeChange |= detectAndFoldOffset(Hi, *Lo);
+ MadeChange |= foldIntoMemoryOps(Hi, *Lo);
}
}
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 721fb87d5a81..d6614fa78411 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -335,10 +335,9 @@ define void @self_store() {
define void @store_addi_addi() {
; CHECK-LABEL: store_addi_addi:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(bar+2047)
-; CHECK-NEXT: addi a0, a0, %lo(bar+2047)
+; CHECK-NEXT: lui a0, %hi(bar+3211)
; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: sb a1, 1164(a0)
+; CHECK-NEXT: sb a1, %lo(bar+3211)(a0)
; CHECK-NEXT: ret
store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 3211)
ret void
@@ -347,10 +346,9 @@ define void @store_addi_addi() {
define void @store_addi_addi_neg() {
; CHECK-LABEL: store_addi_addi_neg:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(bar-2048)
-; CHECK-NEXT: addi a0, a0, %lo(bar-2048)
+; CHECK-NEXT: lui a0, %hi(bar-4000)
; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: sb a1, -1952(a0)
+; CHECK-NEXT: sb a1, %lo(bar-4000)(a0)
; CHECK-NEXT: ret
store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 -4000)
ret void
@@ -360,10 +358,9 @@ define void @store_addi_addi_neg() {
define void @store_sh2add() {
; CHECK-LABEL: store_sh2add:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(bar+8192)
-; CHECK-NEXT: addi a0, a0, %lo(bar+8192)
+; CHECK-NEXT: lui a0, %hi(bar+6424)
; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: sb a1, -1768(a0)
+; CHECK-NEXT: sb a1, %lo(bar+6424)(a0)
; CHECK-NEXT: ret
store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 6424)
ret void
@@ -373,15 +370,38 @@ define void @store_sh2add() {
define void @store_sh3add() {
; CHECK-LABEL: store_sh3add:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(bar+12288)
-; CHECK-NEXT: addi a0, a0, %lo(bar+12288)
+; CHECK-NEXT: lui a0, %hi(bar+12848)
; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: sb a1, 560(a0)
+; CHECK-NEXT: sb a1, %lo(bar+12848)(a0)
; CHECK-NEXT: ret
store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 12848)
ret void
}
+define dso_local void @rmw_addi_addi() nounwind {
+; CHECK-LABEL: rmw_addi_addi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, %hi(bar+3211)
+; CHECK-NEXT: lb a1, %lo(bar+3211)(a0)
+; CHECK-NEXT: blez a1, .LBB23_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: li a1, 10
+; CHECK-NEXT: sb a1, %lo(bar+3211)(a0)
+; CHECK-NEXT: .LBB23_2: # %if.end
+; CHECK-NEXT: ret
+entry:
+ %0 = load i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 3211)
+ %cmp = icmp sgt i8 %0, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 3211)
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32I: {{.*}}
; RV32ZBA: {{.*}}
More information about the llvm-commits
mailing list