[llvm] 02c8453 - [RISCV] Teach RISCVMergeBaseOffset to handle read-modify-write of a global.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 28 11:47:30 PDT 2022
Author: Craig Topper
Date: 2022-06-28T11:46:24-07:00
New Revision: 02c8453e64565c2e22879d1a52304a8fb27751e6
URL: https://github.com/llvm/llvm-project/commit/02c8453e64565c2e22879d1a52304a8fb27751e6
DIFF: https://github.com/llvm/llvm-project/commit/02c8453e64565c2e22879d1a52304a8fb27751e6.diff
LOG: [RISCV] Teach RISCVMergeBaseOffset to handle read-modify-write of a global.
The pass was previously limited to LUI+ADDI being used by a single
instruction.
This patch allows the pass to optimize multiple memory operations
that use the same offset. Each of them will receive a separate %lo
relocation. My main motivation is to handle a read-modify-write
where we have a load and store to the same address, but I didn't
restrict it to that case.
Reviewed By: asb
Differential Revision: https://reviews.llvm.org/D128599
Added:
Modified:
llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 20bf870c697d..b060a73846c4 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -102,8 +102,7 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
if (LoADDI->getOpcode() != RISCV::ADDI ||
LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO ||
!LoADDI->getOperand(2).isGlobal() ||
- LoADDI->getOperand(2).getOffset() != 0 ||
- !MRI->hasOneUse(LoADDI->getOperand(0).getReg()))
+ LoADDI->getOperand(2).getOffset() != 0)
return false;
return true;
}
@@ -252,108 +251,133 @@ bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd,
bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI) {
Register DestReg = LoADDI.getOperand(0).getReg();
- assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI");
- // LoADDI has only one use.
- MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
- switch (Tail.getOpcode()) {
- default:
- LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
- << Tail);
- return false;
- case RISCV::ADDI: {
- // Offset is simply an immediate operand.
- int64_t Offset = Tail.getOperand(2).getImm();
-
- // We might have two ADDIs in a row.
- Register TailDestReg = Tail.getOperand(0).getReg();
- if (MRI->hasOneUse(TailDestReg)) {
- MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
- if (TailTail.getOpcode() == RISCV::ADDI) {
- Offset += TailTail.getOperand(2).getImm();
- LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
- DeadInstrs.insert(&Tail);
- foldOffset(HiLUI, LoADDI, TailTail, Offset);
- return true;
+
+ // First, look for arithmetic instructions we can get an offset from.
+ // We might be able to remove the arithmetic instructions by folding the
+ // offset into the LUI+ADDI.
+ if (MRI->hasOneUse(DestReg)) {
+ // LoADDI has only one use.
+ MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
+ switch (Tail.getOpcode()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
+ << Tail);
+ break;
+ case RISCV::ADDI: {
+ // Offset is simply an immediate operand.
+ int64_t Offset = Tail.getOperand(2).getImm();
+
+ // We might have two ADDIs in a row.
+ Register TailDestReg = Tail.getOperand(0).getReg();
+ if (MRI->hasOneUse(TailDestReg)) {
+ MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
+ if (TailTail.getOpcode() == RISCV::ADDI) {
+ Offset += TailTail.getOperand(2).getImm();
+ LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
+ DeadInstrs.insert(&Tail);
+ foldOffset(HiLUI, LoADDI, TailTail, Offset);
+ return true;
+ }
}
- }
- LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
- foldOffset(HiLUI, LoADDI, Tail, Offset);
- return true;
- }
- case RISCV::ADD: {
- // The offset is too large to fit in the immediate field of ADDI.
- // This can be in two forms:
- // 1) LUI hi_Offset followed by:
- // ADDI lo_offset
- // This happens in case the offset has non zero bits in
- // both hi 20 and lo 12 bits.
- // 2) LUI (offset20)
- // This happens in case the lower 12 bits of the offset are zeros.
- int64_t Offset;
- if (!matchLargeOffset(Tail, DestReg, Offset))
- return false;
- foldOffset(HiLUI, LoADDI, Tail, Offset);
- return true;
+ LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
+ foldOffset(HiLUI, LoADDI, Tail, Offset);
+ return true;
+ }
+ case RISCV::ADD: {
+ // The offset is too large to fit in the immediate field of ADDI.
+ // This can be in two forms:
+ // 1) LUI hi_Offset followed by:
+ // ADDI lo_offset
+ // This happens in case the offset has non zero bits in
+ // both hi 20 and lo 12 bits.
+ // 2) LUI (offset20)
+ // This happens in case the lower 12 bits of the offset are zeros.
+ int64_t Offset;
+ if (!matchLargeOffset(Tail, DestReg, Offset))
+ return false;
+ foldOffset(HiLUI, LoADDI, Tail, Offset);
+ return true;
+ }
+ case RISCV::SH1ADD:
+ case RISCV::SH2ADD:
+ case RISCV::SH3ADD: {
+ // The offset is too large to fit in the immediate field of ADDI.
+ // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
+ // (SH3ADD (ADDI X0, C), DestReg).
+ int64_t Offset;
+ if (!matchShiftedOffset(Tail, DestReg, Offset))
+ return false;
+ foldOffset(HiLUI, LoADDI, Tail, Offset);
+ return true;
+ }
+ }
}
- case RISCV::SH1ADD:
- case RISCV::SH2ADD:
- case RISCV::SH3ADD: {
- // The offset is too large to fit in the immediate field of ADDI.
- // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
- // (SH3ADD (ADDI X0, C), DestReg).
- int64_t Offset;
- if (!matchShiftedOffset(Tail, DestReg, Offset))
+
+ // We didn't find an arithmetic instruction. If all the uses are memory ops
+ // with the same offset, we can transform
+ // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
+ // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
+ // Tail: lw vreg3, 8(vreg2)
+
+ Optional<int64_t> CommonOffset;
+ for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
+ switch (UseMI.getOpcode()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
return false;
- foldOffset(HiLUI, LoADDI, Tail, Offset);
- return true;
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD: {
+ if (UseMI.getOperand(1).isFI())
+ return false;
+ // Register defined by LoADDI should not be the value register.
+ if (DestReg == UseMI.getOperand(0).getReg())
+ return false;
+ assert(DestReg == UseMI.getOperand(1).getReg() &&
+ "Expected base address use");
+ // All load/store instructions must use the same offset.
+ int64_t Offset = UseMI.getOperand(2).getImm();
+ if (CommonOffset && Offset != CommonOffset)
+ return false;
+ CommonOffset = Offset;
+ }
+ }
}
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD: {
- // Transforms the sequence: Into:
- // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
- // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
- // Tail: lw vreg3, 8(vreg2)
- if (Tail.getOperand(1).isFI())
- return false;
- // Register defined by LoADDI should be used in the base part of the
- // load\store instruction. Otherwise, no folding possible.
- Register BaseAddrReg = Tail.getOperand(1).getReg();
- if (DestReg != BaseAddrReg)
- return false;
- MachineOperand &TailImmOp = Tail.getOperand(2);
- int64_t Offset = TailImmOp.getImm();
- // Update the offsets in global address lowering.
- HiLUI.getOperand(1).setOffset(Offset);
- // Update the immediate in the Tail instruction to add the offset.
- Tail.removeOperand(2);
- MachineOperand &ImmOp = LoADDI.getOperand(2);
- ImmOp.setOffset(Offset);
- Tail.addOperand(ImmOp);
+
+ // We found a common offset.
+ // Update the offsets in global address lowering.
+ HiLUI.getOperand(1).setOffset(*CommonOffset);
+ MachineOperand &ImmOp = LoADDI.getOperand(2);
+ ImmOp.setOffset(*CommonOffset);
+
+ // Update the immediate in the load/store instructions to add the offset.
+ for (MachineInstr &UseMI :
+ llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
+ UseMI.removeOperand(2);
+ UseMI.addOperand(ImmOp);
// Update the base reg in the Tail instruction to feed from LUI.
// Output of HiLUI is only used in LoADDI, no need to use
// MRI->replaceRegWith().
- Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
- DeadInstrs.insert(&LoADDI);
- return true;
- }
+ UseMI.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
}
- return false;
+
+ DeadInstrs.insert(&LoADDI);
+ return true;
}
bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
@@ -371,9 +395,8 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
MachineInstr *LoADDI = nullptr;
if (!detectLuiAddiGlobal(HiLUI, LoADDI))
continue;
- LLVM_DEBUG(dbgs() << " Found lowered global address with one use: "
+ LLVM_DEBUG(dbgs() << " Found lowered global address: "
<< *LoADDI->getOperand(2).getGlobal() << "\n");
- // If the use count is only one, merge the offset
MadeChange |= detectAndFoldOffset(HiLUI, *LoADDI);
}
}
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 872d1f72bf6f..5f32533f7620 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -261,6 +261,77 @@ define i8* @offset_sh3add() {
; CHECK-NEXT: ret
ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 12848)
}
+
+define dso_local void @read_modify_write() local_unnamed_addr nounwind {
+; RV32-LABEL: read_modify_write:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a0, %hi(s+160)
+; RV32-NEXT: lw a1, %lo(s+160)(a0)
+; RV32-NEXT: addi a1, a1, 10
+; RV32-NEXT: sw a1, %lo(s+160)(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: read_modify_write:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lui a0, %hi(s+160)
+; RV64-NEXT: lw a1, %lo(s+160)(a0)
+; RV64-NEXT: addiw a1, a1, 10
+; RV64-NEXT: sw a1, %lo(s+160)(a0)
+; RV64-NEXT: ret
+entry:
+ %x = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4
+ %y = add i32 %x, 10
+ store i32 %y, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4
+ ret void
+}
+
+define dso_local void @rmw_with_control_flow() nounwind {
+; CHECK-LABEL: rmw_with_control_flow:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, %hi(s+164)
+; CHECK-NEXT: lw a1, %lo(s+164)(a0)
+; CHECK-NEXT: blez a1, .LBB17_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: li a1, 10
+; CHECK-NEXT: sw a1, %lo(s+164)(a0)
+; CHECK-NEXT: .LBB17_2: # %if.end
+; CHECK-NEXT: ret
+entry:
+ %0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4
+ %cmp = icmp sgt i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+%struct.foo = type { i32, %struct.foo* }
+
+ at f = global %struct.foo zeroinitializer, align 8
+
+; Test the case where the store value and base pointer are the same register.
+define void @self_store() {
+; RV32-LABEL: self_store:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a0, %hi(f)
+; RV32-NEXT: addi a1, a0, %lo(f)
+; RV32-NEXT: sw a1, %lo(f+4)(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: self_store:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a0, %hi(f)
+; RV64-NEXT: addi a0, a0, %lo(f)
+; RV64-NEXT: sd a0, 8(a0)
+; RV64-NEXT: ret
+ store %struct.foo* @f, %struct.foo** getelementptr inbounds (%struct.foo, %struct.foo* @f, i64 0, i32 1), align 8
+ ret void
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32I: {{.*}}
; RV32ZBA: {{.*}}
More information about the llvm-commits
mailing list