[llvm] b225b15 - [LoongArch] Merge base and offset for large offsets (#113277)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 23 04:43:27 PDT 2024
Author: hev
Date: 2024-10-23T19:43:23+08:00
New Revision: b225b15a3d5ff28e4848369ef14ea63f9e418a9d
URL: https://github.com/llvm/llvm-project/commit/b225b15a3d5ff28e4848369ef14ea63f9e418a9d
DIFF: https://github.com/llvm/llvm-project/commit/b225b15a3d5ff28e4848369ef14ea63f9e418a9d.diff
LOG: [LoongArch] Merge base and offset for large offsets (#113277)
This PR merges large offsets into the base address loading.
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
llvm/test/CodeGen/LoongArch/merge-base-offset.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index 47071d29c2cd44..e9455fdd23ba54 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
int64_t Offset) {
- assert(isInt<32>(Offset) && "Unexpected offset");
// Put the offset back in Hi and the Lo
Hi20.getOperand(1).setOffset(Offset);
Lo12.getOperand(2).setOffset(Offset);
@@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
// instructions and deletes TailAdd and the instructions that produced the
// offset.
//
-// Base address lowering is of the form:
-// Hi20: pcalau12i vreg1, %pc_hi20(s)
-// Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
-// / \
-// / \
-// / \
-// / The large offset can be of two forms: \
-// 1) Offset that has non zero bits in lower 2) Offset that has non zero
-// 12 bits and upper 20 bits bits in upper 20 bits only
-// OffsetHi: lu12i.w vreg3, 4
-// OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
-// \ /
-// \ /
-// \ /
-// \ /
-// TailAdd: add.d vreg4, vreg2, voff
+// (The instructions marked with "!" are not necessarily present)
+//
+// Base address lowering is of the form:
+// Hi20: pcalau12i vreg1, %pc_hi20(s)
+// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
+// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
+// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+// |
+// | The large offset can be one of the forms:
+// |
+// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
+// | OffsetHi20: lu12i.w vreg3, 4
+// | OffsetLo12: ori voff, vreg3, 188 ------------------+
+// | |
+// +-> 2) Offset that has non zero bits in Hi20 bits only: |
+// | OffsetHi20: lu12i.w voff, 128 ------------------+
+// | |
+// +-> 3) Offset that has non zero bits in Lo20 bits: |
+// | OffsetHi20: lu12i.w vreg3, 121 ! |
+// | OffsetLo12: ori voff, vreg3, 122 ! |
+// | OffsetLo20: lu32i.d voff, 123 ------------------+
+// +-> 4) Offset that has non zero bits in Hi12 bits: |
+// OffsetHi20: lu12i.w vreg3, 121 ! |
+// OffsetLo12: ori voff, vreg3, 122 ! |
+// OffsetLo20: lu32i.d vreg3, 123 ! |
+// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
+// |
+// TailAdd: add.d vreg4, vreg2, voff <------------------+
+//
bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
@@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
Register Rs = TailAdd.getOperand(1).getReg();
Register Rt = TailAdd.getOperand(2).getReg();
Register Reg = Rs == GAReg ? Rt : Rs;
+ SmallVector<MachineInstr *, 4> Instrs;
+ int64_t Offset = 0;
+ int64_t Mask = -1;
+
+ // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
+ for (int i = 0; i < 4; i++) {
+ // Handle Reg is R0.
+ if (Reg == LoongArch::R0)
+ break;
- // Can't fold if the register has more than one use.
- if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
- return false;
- // This can point to an ORI or a LU12I.W:
- MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
- if (OffsetTail.getOpcode() == LoongArch::ORI) {
- // The offset value has non zero bits in both %hi and %lo parts.
- // Detect an ORI that feeds from a LU12I.W instruction.
- MachineOperand &OriImmOp = OffsetTail.getOperand(2);
- if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
+ // Can't fold if the register has more than one use.
+ if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
return false;
- Register OriReg = OffsetTail.getOperand(1).getReg();
- int64_t OffLo = OriImmOp.getImm();
-
- // Handle rs1 of ORI is R0.
- if (OriReg == LoongArch::R0) {
- LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
- foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
- OffsetTail.eraseFromParent();
- return true;
- }
- MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
- MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
- if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
- Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
- !MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
+ MachineInstr *Curr = MRI->getVRegDef(Reg);
+ if (!Curr)
+ break;
+
+ switch (Curr->getOpcode()) {
+ default:
+ // Can't fold if the instruction opcode is unexpected.
return false;
- int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
- Offset += OffLo;
- // LU12I.W+ORI sign extends the result.
- Offset = SignExtend64<32>(Offset);
- LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
- << " " << OffsetLu12i);
- foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
- OffsetTail.eraseFromParent();
- OffsetLu12i.eraseFromParent();
- return true;
- } else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
- // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
- // exists.
- LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
- int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
- foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
- OffsetTail.eraseFromParent();
- return true;
+ case LoongArch::ORI: {
+ MachineOperand ImmOp = Curr->getOperand(2);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+ return false;
+ Offset += ImmOp.getImm();
+ Reg = Curr->getOperand(1).getReg();
+ Instrs.push_back(Curr);
+ break;
+ }
+ case LoongArch::LU12I_W: {
+ MachineOperand ImmOp = Curr->getOperand(1);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+ return false;
+ Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
+ Reg = LoongArch::R0;
+ Instrs.push_back(Curr);
+ break;
+ }
+ case LoongArch::LU32I_D: {
+ MachineOperand ImmOp = Curr->getOperand(2);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
+ return false;
+ Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
+ Mask ^= 0x000FFFFF00000000ULL;
+ Reg = Curr->getOperand(1).getReg();
+ Instrs.push_back(Curr);
+ break;
+ }
+ case LoongArch::LU52I_D: {
+ MachineOperand ImmOp = Curr->getOperand(2);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
+ return false;
+ Offset += ImmOp.getImm() << 52;
+ Mask ^= 0xFFF0000000000000ULL;
+ Reg = Curr->getOperand(1).getReg();
+ Instrs.push_back(Curr);
+ break;
+ }
+ }
}
- return false;
+
+ // Can't fold if the offset is not extracted.
+ if (!Offset)
+ return false;
+
+ foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
+ LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
+ for (auto I : Instrs) {
+ LLVM_DEBUG(dbgs() << " " << *I);
+ I->eraseFromParent();
+ }
+
+ return true;
}
bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
@@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
[[fallthrough]];
case LoongArch::ADD_D:
// The offset is too large to fit in the immediate field of ADDI.
- // This can be in two forms:
- // 1) LU12I.W hi_offset followed by:
- // ORI lo_offset
- // This happens in case the offset has non zero bits in
- // both hi 20 and lo 12 bits.
- // 2) LU12I.W (offset20)
- // This happens in case the lower 12 bits of the offset are zeros.
return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
break;
}
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
index b53f94303b6ea4..9df5532d51179e 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
@@ -1100,14 +1100,11 @@ define dso_local ptr @load_addr_offset_281474439839744() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_281474439839744:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+2251795518717952)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+2251795518717952)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: ori $a1, $zero, 0
-; LA64-LARGE-NEXT: lu32i.d $a1, 524287
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744)
@@ -1131,14 +1128,11 @@ define dso_local ptr @load_addr_offset_248792680471040() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_248792680471040:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+1990341443768320)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+1990341443768320)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu12i.w $a1, 502733
-; LA64-LARGE-NEXT: lu32i.d $a1, 463412
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040)
@@ -1163,15 +1157,11 @@ define dso_local ptr @load_addr_offset_9380351707272() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_9380351707272:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+75042813658176)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+75042813658176)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+75042813658176)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+75042813658176)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu12i.w $a1, 279556
-; LA64-LARGE-NEXT: ori $a1, $a1, 1088
-; LA64-LARGE-NEXT: lu32i.d $a1, 17472
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272)
@@ -1194,13 +1184,11 @@ define dso_local ptr @load_addr_offset_562949953421312() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_562949953421312:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4503599627370496)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4503599627370496)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu52i.d $a1, $zero, 1
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312)
@@ -1226,16 +1214,11 @@ define dso_local ptr @load_addr_offset_614749556925924693() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_614749556925924693:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4917996455407397544)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu12i.w $a1, 209666
-; LA64-LARGE-NEXT: ori $a1, $a1, 2728
-; LA64-LARGE-NEXT: lu32i.d $a1, 15288
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)
More information about the llvm-commits
mailing list