[llvm] b225b15 - [LoongArch] Merge base and offset for large offsets (#113277)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 23 04:43:27 PDT 2024


Author: hev
Date: 2024-10-23T19:43:23+08:00
New Revision: b225b15a3d5ff28e4848369ef14ea63f9e418a9d

URL: https://github.com/llvm/llvm-project/commit/b225b15a3d5ff28e4848369ef14ea63f9e418a9d
DIFF: https://github.com/llvm/llvm-project/commit/b225b15a3d5ff28e4848369ef14ea63f9e418a9d.diff

LOG: [LoongArch] Merge base and offset for large offsets (#113277)

This PR merges large offsets into the base address loading.

Added: 
    

Modified: 
    llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
    llvm/test/CodeGen/LoongArch/merge-base-offset.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index 47071d29c2cd44..e9455fdd23ba54 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
     int64_t Offset) {
-  assert(isInt<32>(Offset) && "Unexpected offset");
   // Put the offset back in Hi and the Lo
   Hi20.getOperand(1).setOffset(Offset);
   Lo12.getOperand(2).setOffset(Offset);
@@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
 // instructions and deletes TailAdd and the instructions that produced the
 // offset.
 //
-//                     Base address lowering is of the form:
-//                       Hi20:  pcalau12i vreg1, %pc_hi20(s)
-//                       Lo12:  addi.d vreg2, vreg1, %pc_lo12(s)
-//                       /                                  \
-//                      /                                    \
-//                     /                                      \
-//                    /  The large offset can be of two forms: \
-//  1) Offset that has non zero bits in lower      2) Offset that has non zero
-//     12 bits and upper 20 bits                      bits in upper 20 bits only
-//   OffsetHi: lu12i.w vreg3, 4
-//   OffsetLo: ori voff, vreg3, 188                 OffsetHi: lu12i.w voff, 128
-//                    \                                        /
-//                     \                                      /
-//                      \                                    /
-//                       \                                  /
-//                        TailAdd: add.d  vreg4, vreg2, voff
+//   (The instructions marked with "!" are not necessarily present)
+//
+//        Base address lowering is of the form:
+//           Hi20:  pcalau12i vreg1, %pc_hi20(s)
+//        +- Lo12:  addi.d vreg2, vreg1, %pc_lo12(s)
+//        |  Lo20:  lu32i.d vreg2, %pc64_lo20(s) !
+//        +- Hi12:  lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+//        |
+//        | The large offset can be one of the forms:
+//        |
+//        +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
+//        |     OffsetHi20: lu12i.w vreg3, 4
+//        |     OffsetLo12: ori voff, vreg3, 188    ------------------+
+//        |                                                           |
+//        +-> 2) Offset that has non zero bits in Hi20 bits only:     |
+//        |     OffsetHi20: lu12i.w voff, 128       ------------------+
+//        |                                                           |
+//        +-> 3) Offset that has non zero bits in Lo20 bits:          |
+//        |     OffsetHi20: lu12i.w vreg3, 121 !                      |
+//        |     OffsetLo12: ori voff, vreg3, 122 !                    |
+//        |     OffsetLo20: lu32i.d voff, 123       ------------------+
+//        +-> 4) Offset that has non zero bits in Hi12 bits:          |
+//              OffsetHi20: lu12i.w vreg3, 121 !                      |
+//              OffsetLo12: ori voff, vreg3, 122 !                    |
+//              OffsetLo20: lu32i.d vreg3, 123 !                      |
+//              OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
+//                                                                    |
+//        TailAdd: add.d  vreg4, vreg2, voff       <------------------+
+//
 bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
@@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
   Register Rs = TailAdd.getOperand(1).getReg();
   Register Rt = TailAdd.getOperand(2).getReg();
   Register Reg = Rs == GAReg ? Rt : Rs;
+  SmallVector<MachineInstr *, 4> Instrs;
+  int64_t Offset = 0;
+  int64_t Mask = -1;
+
+  // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
+  for (int i = 0; i < 4; i++) {
+    // Handle Reg is R0.
+    if (Reg == LoongArch::R0)
+      break;
 
-  // Can't fold if the register has more than one use.
-  if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
-    return false;
-  // This can point to an ORI or a LU12I.W:
-  MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
-  if (OffsetTail.getOpcode() == LoongArch::ORI) {
-    // The offset value has non zero bits in both %hi and %lo parts.
-    // Detect an ORI that feeds from a LU12I.W instruction.
-    MachineOperand &OriImmOp = OffsetTail.getOperand(2);
-    if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
+    // Can't fold if the register has more than one use.
+    if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
       return false;
-    Register OriReg = OffsetTail.getOperand(1).getReg();
-    int64_t OffLo = OriImmOp.getImm();
-
-    // Handle rs1 of ORI is R0.
-    if (OriReg == LoongArch::R0) {
-      LLVM_DEBUG(dbgs() << "  Offset Instrs: " << OffsetTail);
-      foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
-      OffsetTail.eraseFromParent();
-      return true;
-    }
 
-    MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
-    MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
-    if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
-        Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
-        !MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
+    MachineInstr *Curr = MRI->getVRegDef(Reg);
+    if (!Curr)
+      break;
+
+    switch (Curr->getOpcode()) {
+    default:
+      // Can't fold if the instruction opcode is unexpected.
       return false;
-    int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
-    Offset += OffLo;
-    // LU12I.W+ORI sign extends the result.
-    Offset = SignExtend64<32>(Offset);
-    LLVM_DEBUG(dbgs() << "  Offset Instrs: " << OffsetTail
-                      << "                 " << OffsetLu12i);
-    foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
-    OffsetTail.eraseFromParent();
-    OffsetLu12i.eraseFromParent();
-    return true;
-  } else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
-    // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
-    // exists.
-    LLVM_DEBUG(dbgs() << "  Offset Instr: " << OffsetTail);
-    int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
-    foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
-    OffsetTail.eraseFromParent();
-    return true;
+    case LoongArch::ORI: {
+      MachineOperand ImmOp = Curr->getOperand(2);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+        return false;
+      Offset += ImmOp.getImm();
+      Reg = Curr->getOperand(1).getReg();
+      Instrs.push_back(Curr);
+      break;
+    }
+    case LoongArch::LU12I_W: {
+      MachineOperand ImmOp = Curr->getOperand(1);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+        return false;
+      Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
+      Reg = LoongArch::R0;
+      Instrs.push_back(Curr);
+      break;
+    }
+    case LoongArch::LU32I_D: {
+      MachineOperand ImmOp = Curr->getOperand(2);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
+        return false;
+      Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
+      Mask ^= 0x000FFFFF00000000ULL;
+      Reg = Curr->getOperand(1).getReg();
+      Instrs.push_back(Curr);
+      break;
+    }
+    case LoongArch::LU52I_D: {
+      MachineOperand ImmOp = Curr->getOperand(2);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
+        return false;
+      Offset += ImmOp.getImm() << 52;
+      Mask ^= 0xFFF0000000000000ULL;
+      Reg = Curr->getOperand(1).getReg();
+      Instrs.push_back(Curr);
+      break;
+    }
+    }
   }
-  return false;
+
+  // Can't fold if the offset is not extracted.
+  if (!Offset)
+    return false;
+
+  foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
+  LLVM_DEBUG(dbgs() << "  Offset Instrs:\n");
+  for (auto I : Instrs) {
+    LLVM_DEBUG(dbgs() << "                 " << *I);
+    I->eraseFromParent();
+  }
+
+  return true;
 }
 
 bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
@@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
     [[fallthrough]];
   case LoongArch::ADD_D:
     // The offset is too large to fit in the immediate field of ADDI.
-    // This can be in two forms:
-    // 1) LU12I.W hi_offset followed by:
-    //    ORI lo_offset
-    //    This happens in case the offset has non zero bits in
-    //    both hi 20 and lo 12 bits.
-    // 2) LU12I.W (offset20)
-    //    This happens in case the lower 12 bits of the offset are zeros.
     return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
     break;
   }

diff  --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
index b53f94303b6ea4..9df5532d51179e 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
@@ -1100,14 +1100,11 @@ define dso_local ptr @load_addr_offset_281474439839744() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_281474439839744:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+2251795518717952)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+2251795518717952)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    ori $a1, $zero, 0
-; LA64-LARGE-NEXT:    lu32i.d $a1, 524287
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744)
@@ -1131,14 +1128,11 @@ define dso_local ptr @load_addr_offset_248792680471040() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_248792680471040:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+1990341443768320)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+1990341443768320)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu12i.w $a1, 502733
-; LA64-LARGE-NEXT:    lu32i.d $a1, 463412
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040)
@@ -1163,15 +1157,11 @@ define dso_local ptr @load_addr_offset_9380351707272() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_9380351707272:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+75042813658176)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+75042813658176)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+75042813658176)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+75042813658176)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu12i.w $a1, 279556
-; LA64-LARGE-NEXT:    ori $a1, $a1, 1088
-; LA64-LARGE-NEXT:    lu32i.d $a1, 17472
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272)
@@ -1194,13 +1184,11 @@ define dso_local ptr @load_addr_offset_562949953421312() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_562949953421312:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+4503599627370496)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+4503599627370496)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu52i.d $a1, $zero, 1
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312)
@@ -1226,16 +1214,11 @@ define dso_local ptr @load_addr_offset_614749556925924693() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_614749556925924693:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+4917996455407397544)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu12i.w $a1, 209666
-; LA64-LARGE-NEXT:    ori $a1, $a1, 2728
-; LA64-LARGE-NEXT:    lu32i.d $a1, 15288
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, 1092
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)


        


More information about the llvm-commits mailing list