[llvm] [PowerPC] Peephole address calculation in TOC memops (PR #76488)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 21 01:25:54 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: Qiu Chaofan (ecnelises)

<details>
<summary>Changes</summary>

In peephole after isel, optimize `addi C, A, B; memop X, 0(C)` into `memop X, A(B)`.

This relands commit 5e28d30f1fb10faf2db2f8bf0502e7fd72e6ac2e.

---

Patch is 29.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76488.diff


5 Files Affected:

- (modified) llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (+184-213) 
- (modified) llvm/test/CodeGen/PowerPC/toc-data-common.ll (+101-49) 
- (modified) llvm/test/CodeGen/PowerPC/toc-data-const.ll (+26-19) 
- (added) llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll (+16) 
- (modified) llvm/test/CodeGen/PowerPC/toc-data.ll (+11-11) 


``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9e5f0b36616d1b..34e9dd6608cf51 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7662,241 +7662,212 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
     DAG->RemoveDeadNode(InitialADDI.getNode());
 }
 
-void PPCDAGToDAGISel::PeepholePPC64() {
-  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-  bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
-
-  while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = &*--Position;
-    // Skip dead nodes and any non-machine opcodes.
-    if (N->use_empty() || !N->isMachineOpcode())
-      continue;
-
-    if (isVSXSwap(SDValue(N, 0)))
-      reduceVSXSwap(N, CurDAG);
-
-    // This optimization is performed for non-TOC-based local-exec accesses.
-    if (HasAIXSmallLocalExecTLS)
-      foldADDIForLocalExecAccesses(N, CurDAG);
+static bool isValidOffsetMemOp(SDNode *N, bool &IsLoad,
+                               MaybeAlign &ExtraAlign) {
+  switch (N->getMachineOpcode()) {
+  default:
+    return false;
+  // Global must be word-aligned for LD, STD, LWA.
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::DFLOADf64:
+  case PPC::DFLOADf32:
+    ExtraAlign = Align(4);
+    [[fallthrough]];
+  case PPC::LBZ:
+  case PPC::LBZ8:
+  case PPC::LFD:
+  case PPC::LFS:
+  case PPC::LHA:
+  case PPC::LHA8:
+  case PPC::LHZ:
+  case PPC::LHZ8:
+  case PPC::LWZ:
+  case PPC::LWZ8:
+    IsLoad = true;
+    break;
+  case PPC::STD:
+  case PPC::DFSTOREf64:
+  case PPC::DFSTOREf32:
+    ExtraAlign = Align(4);
+    [[fallthrough]];
+  case PPC::STB:
+  case PPC::STB8:
+  case PPC::STFD:
+  case PPC::STFS:
+  case PPC::STH:
+  case PPC::STH8:
+  case PPC::STW:
+  case PPC::STW8:
+    break;
+  }
+  SDValue Base = N->getOperand(IsLoad ? 1 : 2);
+  if (!Base.isMachineOpcode())
+    return false;
+  switch (Base.getMachineOpcode()) {
+  default:
+    return false;
+  case PPC::ADDI8:
+  case PPC::ADDI:
+  case PPC::ADDIdtprelL:
+  case PPC::ADDItlsldL:
+  case PPC::ADDItocL:
+  case PPC::ADDItoc:
+  case PPC::ADDItoc8:
+    break;
+  }
+  return true;
+}
 
-    unsigned FirstOp;
-    unsigned StorageOpcode = N->getMachineOpcode();
-    bool RequiresMod4Offset = false;
+static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
+                              const PPCSubtarget *Subtarget) {
+  // TODO: Enable for AIX 32-bit.
+  if (!Subtarget->isPPC64())
+    return;
 
-    switch (StorageOpcode) {
-    default: continue;
+  bool IsLoad = false;
+  MaybeAlign ExtraAlign;
+  if (!isValidOffsetMemOp(N, IsLoad, ExtraAlign))
+    return;
 
-    case PPC::LWA:
-    case PPC::LD:
-    case PPC::DFLOADf64:
-    case PPC::DFLOADf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::LBZ:
-    case PPC::LBZ8:
-    case PPC::LFD:
-    case PPC::LFS:
-    case PPC::LHA:
-    case PPC::LHA8:
-    case PPC::LHZ:
-    case PPC::LHZ8:
-    case PPC::LWZ:
-    case PPC::LWZ8:
-      FirstOp = 0;
-      break;
+  SDValue MemBase = N->getOperand(IsLoad ? 1 : 2);
+  unsigned BaseOpc = MemBase.getMachineOpcode();
+  auto *MemOffset = dyn_cast<ConstantSDNode>(N->getOperand(IsLoad ? 0 : 1));
 
-    case PPC::STD:
-    case PPC::DFSTOREf64:
-    case PPC::DFSTOREf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::STB:
-    case PPC::STB8:
-    case PPC::STFD:
-    case PPC::STFS:
-    case PPC::STH:
-    case PPC::STH8:
-    case PPC::STW:
-    case PPC::STW8:
-      FirstOp = 1;
-      break;
-    }
+  // Only additions with constant offsets will be folded.
+  if (!MemOffset)
+    return;
+  assert(MemBase.getNumOperands() == 2 && "Invalid base of memop with offset!");
 
-    // If this is a load or store with a zero offset, or within the alignment,
-    // we may be able to fold an add-immediate into the memory operation.
-    // The check against alignment is below, as it can't occur until we check
-    // the arguments to N
-    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
-      continue;
+  SDValue ImmOp, RegOp;
+  // ADDItoc and ADDItoc8 ('la') puts the register at the second operand.
+  if (BaseOpc == PPC::ADDItoc || BaseOpc == PPC::ADDItoc8) {
+    ImmOp = MemBase.getOperand(0);
+    RegOp = MemBase.getOperand(1);
+  } else {
+    ImmOp = MemBase.getOperand(1);
+    RegOp = MemBase.getOperand(0);
+  }
 
-    SDValue Base = N->getOperand(FirstOp + 1);
-    if (!Base.isMachineOpcode())
-      continue;
+  MaybeAlign ImmAlign;
+  if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOp))
+    ImmAlign = GA->getGlobal()->getPointerAlignment(DAG->getDataLayout());
+  else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOp))
+    ImmAlign = CP->getAlign();
 
-    unsigned Flags = 0;
-    bool ReplaceFlags = true;
+  if (ImmAlign && ExtraAlign && ImmAlign.value() < ExtraAlign.value())
+    return;
 
-    // When the feeding operation is an add-immediate of some sort,
-    // determine whether we need to add relocation information to the
-    // target flags on the immediate operand when we fold it into the
-    // load instruction.
-    //
-    // For something like ADDItocL, the relocation information is
-    // inferred from the opcode; when we process it in the AsmPrinter,
-    // we add the necessary relocation there.  A load, though, can receive
-    // relocation from various flavors of ADDIxxx, so we need to carry
-    // the relocation information in the target flags.
-    switch (Base.getMachineOpcode()) {
-    default: continue;
-
-    case PPC::ADDI8:
-    case PPC::ADDI:
-      // In some cases (such as TLS) the relocation information
-      // is already in place on the operand, so copying the operand
-      // is sufficient.
-      ReplaceFlags = false;
-      break;
-    case PPC::ADDIdtprelL:
-      Flags = PPCII::MO_DTPREL_LO;
-      break;
-    case PPC::ADDItlsldL:
-      Flags = PPCII::MO_TLSLD_LO;
-      break;
-    case PPC::ADDItocL:
-      Flags = PPCII::MO_TOC_LO;
-      break;
+  // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
+  // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
+  // we might have needed different @ha relocation values for the offset
+  // pointers).
+  int MaxDisplacement = 7;
+  if (ImmAlign && ImmAlign.value().value() < 8)
+    MaxDisplacement = (int)ImmAlign.value().value() - 1;
+
+  // If addis also contributes to TOC relocation, it also needs to be updated.
+  bool UpdateHaBase = false;
+  SDValue HaBase = MemBase.getOperand(0);
+  int64_t Offset = MemOffset->getSExtValue();
+
+  // Some flags in addition needs to be carried to new memop.
+  PPCII::TOF NewOpFlags = PPCII::MO_NO_FLAG;
+  if (BaseOpc == PPC::ADDIdtprelL)
+    NewOpFlags = PPCII::MO_DTPREL_LO;
+  else if (BaseOpc == PPC::ADDItlsldL)
+    NewOpFlags = PPCII::MO_TLSLD_LO;
+  else if (BaseOpc == PPC::ADDItocL)
+    NewOpFlags = PPCII::MO_TOC_LO;
+
+  if (NewOpFlags) {
+    if (Offset < 0 || Offset > MaxDisplacement) {
+      // Check base opcode and its uses, quit if it has multiple uses.
+      if (MemBase.getMachineOpcode() != PPC::ADDItocL || !MemBase.hasOneUse() ||
+          !HaBase.isMachineOpcode() || !HaBase.hasOneUse() ||
+          HaBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
+          HaBase.getOperand(1) != ImmOp)
+        return;
+      UpdateHaBase = true;
     }
 
-    SDValue ImmOpnd = Base.getOperand(1);
-
-    // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
-    // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
-    // we might have needed different @ha relocation values for the offset
-    // pointers).
-    int MaxDisplacement = 7;
-    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
-      const GlobalValue *GV = GA->getGlobal();
-      Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
-      MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
+    if (const auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOp)) {
+      // We can't perform this optimization for data whose alignment is
+      // insufficient for the instruction encoding.
+      if (ImmAlign && ImmAlign.value() < Align(4) &&
+          (ExtraAlign || (Offset % 4) != 0))
+        return;
+      ImmOp = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(ImmOp),
+                                          MVT::i64, Offset, NewOpFlags);
+    } else if (const auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOp)) {
+      ImmOp = DAG->getTargetConstantPool(CP->getConstVal(), MVT::i64,
+                                         CP->getAlign(), Offset, NewOpFlags);
     }
+  } else {
+    if (ImmAlign && ExtraAlign && ImmAlign.value() < ExtraAlign.value())
+      return;
+    if (auto *C = dyn_cast<ConstantSDNode>(ImmOp)) {
+      Offset += C->getSExtValue();
 
-    bool UpdateHBase = false;
-    SDValue HBase = Base.getOperand(0);
-
-    int Offset = N->getConstantOperandVal(FirstOp);
-    if (ReplaceFlags) {
-      if (Offset < 0 || Offset > MaxDisplacement) {
-        // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
-        // one use, then we can do this for any offset, we just need to also
-        // update the offset (i.e. the symbol addend) on the addis also.
-        if (Base.getMachineOpcode() != PPC::ADDItocL)
-          continue;
-
-        if (!HBase.isMachineOpcode() ||
-            HBase.getMachineOpcode() != PPC::ADDIStocHA8)
-          continue;
-
-        if (!Base.hasOneUse() || !HBase.hasOneUse())
-          continue;
-
-        SDValue HImmOpnd = HBase.getOperand(1);
-        if (HImmOpnd != ImmOpnd)
-          continue;
-
-        UpdateHBase = true;
-      }
-    } else {
-      // Global addresses can be folded, but only if they are sufficiently
-      // aligned.
-      if (RequiresMod4Offset) {
-        if (GlobalAddressSDNode *GA =
-                dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
-          const GlobalValue *GV = GA->getGlobal();
-          Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
-          if (Alignment < 4)
-            continue;
-        }
+      if ((Offset % ExtraAlign.valueOrOne().value()) != 0 || !isInt<16>(Offset))
+        return;
+      ImmOp =
+          DAG->getTargetConstant(Offset, SDLoc(ImmOp), ImmOp.getValueType());
+    } else if (Offset != 0) {
+      // This optimization is performed for non-TOC-based local-exec accesses.
+      if (Subtarget->hasAIXSmallLocalExecTLS() &&
+          isEligibleToFoldADDIForLocalExecAccesses(DAG, MemBase)) {
+        // Add the non-zero offset information into the load or store
+        // instruction to be used for non-TOC-based local-exec accesses.
+        GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOp);
+        assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
+                     "addi into local-exec accesses!");
+        ImmOp = DAG->getTargetGlobalAddress(
+            GA->getGlobal(), SDLoc(GA), MVT::i64, Offset, GA->getTargetFlags());
+      } else {
+        return;
       }
+    }
+  }
 
-      // If we're directly folding the addend from an addi instruction, then:
-      //  1. In general, the offset on the memory access must be zero.
-      //  2. If the addend is a constant, then it can be combined with a
-      //     non-zero offset, but only if the result meets the encoding
-      //     requirements.
-      if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
-        Offset += C->getSExtValue();
+  LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
+  LLVM_DEBUG(MemBase->dump(DAG));
+  LLVM_DEBUG(dbgs() << "\nN: ");
+  LLVM_DEBUG(N->dump(DAG));
+  LLVM_DEBUG(dbgs() << "\n");
 
-        if (RequiresMod4Offset && (Offset % 4) != 0)
-          continue;
+  if (IsLoad)
+    (void)DAG->UpdateNodeOperands(N, ImmOp, RegOp, N->getOperand(2));
+  else
+    (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOp, RegOp,
+                                  N->getOperand(3));
 
-        if (!isInt<16>(Offset))
-          continue;
+  if (UpdateHaBase)
+    (void)DAG->UpdateNodeOperands(HaBase.getNode(), HaBase.getOperand(0),
+                                  ImmOp);
 
-        ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
-                                            ImmOpnd.getValueType());
-      } else if (Offset != 0) {
-        // This optimization is performed for non-TOC-based local-exec accesses.
-        if (HasAIXSmallLocalExecTLS &&
-            isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
-          // Add the non-zero offset information into the load or store
-          // instruction to be used for non-TOC-based local-exec accesses.
-          GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
-          assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
-                       "addi into local-exec accesses!");
-          ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
-                                                   MVT::i64, Offset,
-                                                   GA->getTargetFlags());
-        } else
-          continue;
-      }
-    }
+  if (MemBase.getNode()->use_empty())
+    DAG->RemoveDeadNode(MemBase.getNode());
+}
 
-    // We found an opportunity.  Reverse the operands from the add
-    // immediate and substitute them into the load or store.  If
-    // needed, update the target flags for the immediate operand to
-    // reflect the necessary relocation information.
-    LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
-    LLVM_DEBUG(Base->dump(CurDAG));
-    LLVM_DEBUG(dbgs() << "\nN: ");
-    LLVM_DEBUG(N->dump(CurDAG));
-    LLVM_DEBUG(dbgs() << "\n");
+void PPCDAGToDAGISel::PeepholePPC64() {
+  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+  bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
 
-    // If the relocation information isn't already present on the
-    // immediate operand, add it now.
-    if (ReplaceFlags) {
-      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
-        SDLoc dl(GA);
-        const GlobalValue *GV = GA->getGlobal();
-        Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
-        // We can't perform this optimization for data whose alignment
-        // is insufficient for the instruction encoding.
-        if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
-          LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
-          continue;
-        }
-        ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
-      } else if (ConstantPoolSDNode *CP =
-                 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
-        const Constant *C = CP->getConstVal();
-        ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
-                                                Offset, Flags);
-      }
-    }
+  while (Position != CurDAG->allnodes_begin()) {
+    SDNode *N = &*--Position;
+    // Skip dead nodes and any non-machine opcodes.
+    if (N->use_empty() || !N->isMachineOpcode())
+      continue;
 
-    if (FirstOp == 1) // Store
-      (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
-                                       Base.getOperand(0), N->getOperand(3));
-    else // Load
-      (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
-                                       N->getOperand(2));
+    if (isVSXSwap(SDValue(N, 0)))
+      reduceVSXSwap(N, CurDAG);
 
-    if (UpdateHBase)
-      (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
-                                       ImmOpnd);
+    // This optimization is performed for non-TOC-based local-exec accesses.
+    if (HasAIXSmallLocalExecTLS)
+      foldADDIForLocalExecAccesses(N, CurDAG);
 
-    // The add-immediate may now be dead, in which case remove it.
-    if (Base.getNode()->use_empty())
-      CurDAG->RemoveDeadNode(Base.getNode());
+    peepholeMemOffset(N, CurDAG, Subtarget);
   }
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-common.ll b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
index 7747f2eecc935e..aa9db48c9937f4 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data-common.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefix=CHECK
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefix=CHECK
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefixes=CHECK32,CHECK
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefixes=CHECK64,CHECK
 
 ; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
 ; RUN: llvm-objdump -t --symbol-description %t32.o | FileCheck %s --check-prefix=OBJ32
@@ -14,17 +14,27 @@
 @a4 = global i32 0, align 4
 
 define void @set(i32 noundef %_a) {
-; CHECK-LABEL: set:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 4, a2[TD](2)
-; CHECK-NEXT: la 5, a1[TD](2)
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: [[INSTR]] 4, L..C0(2) # @a4
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: [[INSTR]] 5, L..C1(2) # @a3
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: set:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    la 4, a2[TD](2)
+; CHECK32-NEXT:    la 5, a1[TD](2)
+; CHECK32-NEXT:    stw 3, 0(4)
+; CHECK32-NEXT:    lwz 4, L..C0(2) # @a4
+; CHECK32-NEXT:    stw 3, 0(5)
+; CHECK32-NEXT:    lwz 5, L..C1(2) # @a3
+; CHECK32-NEXT:    stw 3, 0(4)
+; CHECK32-NEXT:    stw 3, 0(5)
+; CHECK32-NEXT:    blr
+;
+; CHECK64-LABEL: set:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    ld 4, L..C0(2) # @a4
+; CHECK64-NEXT:    stw 3, a2[TD](2)
+; CHECK64-NEXT:    stw 3, a1[TD](2)
+; CHECK64-NEXT:    stw 3, 0(4)
+; CHECK64-NEXT:    ld 4, L..C1(2) # @a3
+; CHECK64-NEXT:    stw 3, 0(4)
+; CHECK64-NEXT:    blr
 entry:
 store i32 %_a, ptr @a2, align 4
 store i32 %_a, ptr @a1, align 4
@@ -34,81 +44,123 @@ ret void
 }
 
 define i32 @get1() {
-; CHECK-LABEL: get1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a2[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get1:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    la 3, a2[TD](2)
+; CHECK32-NEXT:    lwz 3, 0(3)
+; CHECK32-NEXT:    blr
+;
+; CHECK64-LABEL: get1:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    lwz 3, a2[TD](2)
+; CHECK64-NEXT:    blr
 entry:
 %0 = load i32, ptr @a2, align 4
 ret i32 %0
 }
 
 define i32 @get2() {
-; CHECK-LABEL: get2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a1[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get2:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    la 3, a1[TD](2)
+; CHECK32-NEXT:    lwz 3, 0(3)
+; CHECK32-NEXT:    blr
+;
+; CHECK64-LABEL: get2:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    lwz 3, a1[TD](2)
+; CHECK64-NEXT:    blr
 entry:
 %0 = load i32, ptr @a1, align 4
 ret i32 %0
 }
 
 define i32 @get3() {
-; CHECK-LABEL: get3:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get3:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    lwz 3, L..C0(2) # @a4
+; CHECK32-NEXT:    lwz 3, 0(3)
+; CHECK32-NEXT:    blr
+;
+; CHECK64-LABEL: get3:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    ld 3, L..C0(2) # @a4
+; CHECK64-NEXT:    lwz 3, 0(3)
+; CHECK64-NEXT:    blr
 entry:
 %0 = load i32, ptr @a4, align 4
 ret i32 %0
 }
 
 define i32 @get4() {
-; CHECK-LABEL: get4:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get4:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    lwz 3, L..C1(2) # @a3
+; CHECK32-NEXT:    lwz 3, 0(3)
+; CHECK32-NEXT:    blr
+;
+; CHECK64-LABEL: get4:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    ld 3, L..C1(2) # @a3
+; CHECK64-NEXT:    lwz 3, 0(3)
+; CHECK64-NEXT:    blr
 entry:
 %0 = ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/76488


More information about the llvm-commits mailing list