[llvm] [PowerPC] Peephole address calculation in TOC memops (PR #76488)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 21 01:25:54 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Qiu Chaofan (ecnelises)
<details>
<summary>Changes</summary>
In peephole after isel, optimize `addi C, A, B; memop X, 0(C)` into `memop X, A(B)`.
This relands commit 5e28d30f1fb10faf2db2f8bf0502e7fd72e6ac2e.
---
Patch is 29.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76488.diff
5 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (+184-213)
- (modified) llvm/test/CodeGen/PowerPC/toc-data-common.ll (+101-49)
- (modified) llvm/test/CodeGen/PowerPC/toc-data-const.ll (+26-19)
- (added) llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll (+16)
- (modified) llvm/test/CodeGen/PowerPC/toc-data.ll (+11-11)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9e5f0b36616d1b..34e9dd6608cf51 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7662,241 +7662,212 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
DAG->RemoveDeadNode(InitialADDI.getNode());
}
-void PPCDAGToDAGISel::PeepholePPC64() {
- SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
- bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
-
- while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = &*--Position;
- // Skip dead nodes and any non-machine opcodes.
- if (N->use_empty() || !N->isMachineOpcode())
- continue;
-
- if (isVSXSwap(SDValue(N, 0)))
- reduceVSXSwap(N, CurDAG);
-
- // This optimization is performed for non-TOC-based local-exec accesses.
- if (HasAIXSmallLocalExecTLS)
- foldADDIForLocalExecAccesses(N, CurDAG);
+static bool isValidOffsetMemOp(SDNode *N, bool &IsLoad,
+ MaybeAlign &ExtraAlign) {
+ switch (N->getMachineOpcode()) {
+ default:
+ return false;
+ // Global must be word-aligned for LD, STD, LWA.
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::DFLOADf64:
+ case PPC::DFLOADf32:
+ ExtraAlign = Align(4);
+ [[fallthrough]];
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ IsLoad = true;
+ break;
+ case PPC::STD:
+ case PPC::DFSTOREf64:
+ case PPC::DFSTOREf32:
+ ExtraAlign = Align(4);
+ [[fallthrough]];
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ break;
+ }
+ SDValue Base = N->getOperand(IsLoad ? 1 : 2);
+ if (!Base.isMachineOpcode())
+ return false;
+ switch (Base.getMachineOpcode()) {
+ default:
+ return false;
+ case PPC::ADDI8:
+ case PPC::ADDI:
+ case PPC::ADDIdtprelL:
+ case PPC::ADDItlsldL:
+ case PPC::ADDItocL:
+ case PPC::ADDItoc:
+ case PPC::ADDItoc8:
+ break;
+ }
+ return true;
+}
- unsigned FirstOp;
- unsigned StorageOpcode = N->getMachineOpcode();
- bool RequiresMod4Offset = false;
+static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
+ const PPCSubtarget *Subtarget) {
+ // TODO: Enable for AIX 32-bit.
+ if (!Subtarget->isPPC64())
+ return;
- switch (StorageOpcode) {
- default: continue;
+ bool IsLoad = false;
+ MaybeAlign ExtraAlign;
+ if (!isValidOffsetMemOp(N, IsLoad, ExtraAlign))
+ return;
- case PPC::LWA:
- case PPC::LD:
- case PPC::DFLOADf64:
- case PPC::DFLOADf32:
- RequiresMod4Offset = true;
- [[fallthrough]];
- case PPC::LBZ:
- case PPC::LBZ8:
- case PPC::LFD:
- case PPC::LFS:
- case PPC::LHA:
- case PPC::LHA8:
- case PPC::LHZ:
- case PPC::LHZ8:
- case PPC::LWZ:
- case PPC::LWZ8:
- FirstOp = 0;
- break;
+ SDValue MemBase = N->getOperand(IsLoad ? 1 : 2);
+ unsigned BaseOpc = MemBase.getMachineOpcode();
+ auto *MemOffset = dyn_cast<ConstantSDNode>(N->getOperand(IsLoad ? 0 : 1));
- case PPC::STD:
- case PPC::DFSTOREf64:
- case PPC::DFSTOREf32:
- RequiresMod4Offset = true;
- [[fallthrough]];
- case PPC::STB:
- case PPC::STB8:
- case PPC::STFD:
- case PPC::STFS:
- case PPC::STH:
- case PPC::STH8:
- case PPC::STW:
- case PPC::STW8:
- FirstOp = 1;
- break;
- }
+ // Only additions with constant offsets will be folded.
+ if (!MemOffset)
+ return;
+ assert(MemBase.getNumOperands() == 2 && "Invalid base of memop with offset!");
- // If this is a load or store with a zero offset, or within the alignment,
- // we may be able to fold an add-immediate into the memory operation.
- // The check against alignment is below, as it can't occur until we check
- // the arguments to N
- if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
- continue;
+ SDValue ImmOp, RegOp;
+ // ADDItoc and ADDItoc8 ('la') puts the register at the second operand.
+ if (BaseOpc == PPC::ADDItoc || BaseOpc == PPC::ADDItoc8) {
+ ImmOp = MemBase.getOperand(0);
+ RegOp = MemBase.getOperand(1);
+ } else {
+ ImmOp = MemBase.getOperand(1);
+ RegOp = MemBase.getOperand(0);
+ }
- SDValue Base = N->getOperand(FirstOp + 1);
- if (!Base.isMachineOpcode())
- continue;
+ MaybeAlign ImmAlign;
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOp))
+ ImmAlign = GA->getGlobal()->getPointerAlignment(DAG->getDataLayout());
+ else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOp))
+ ImmAlign = CP->getAlign();
- unsigned Flags = 0;
- bool ReplaceFlags = true;
+ if (ImmAlign && ExtraAlign && ImmAlign.value() < ExtraAlign.value())
+ return;
- // When the feeding operation is an add-immediate of some sort,
- // determine whether we need to add relocation information to the
- // target flags on the immediate operand when we fold it into the
- // load instruction.
- //
- // For something like ADDItocL, the relocation information is
- // inferred from the opcode; when we process it in the AsmPrinter,
- // we add the necessary relocation there. A load, though, can receive
- // relocation from various flavors of ADDIxxx, so we need to carry
- // the relocation information in the target flags.
- switch (Base.getMachineOpcode()) {
- default: continue;
-
- case PPC::ADDI8:
- case PPC::ADDI:
- // In some cases (such as TLS) the relocation information
- // is already in place on the operand, so copying the operand
- // is sufficient.
- ReplaceFlags = false;
- break;
- case PPC::ADDIdtprelL:
- Flags = PPCII::MO_DTPREL_LO;
- break;
- case PPC::ADDItlsldL:
- Flags = PPCII::MO_TLSLD_LO;
- break;
- case PPC::ADDItocL:
- Flags = PPCII::MO_TOC_LO;
- break;
+ // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
+ // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
+ // we might have needed different @ha relocation values for the offset
+ // pointers).
+ int MaxDisplacement = 7;
+ if (ImmAlign && ImmAlign.value().value() < 8)
+ MaxDisplacement = (int)ImmAlign.value().value() - 1;
+
+ // If addis also contributes to TOC relocation, it also needs to be updated.
+ bool UpdateHaBase = false;
+ SDValue HaBase = MemBase.getOperand(0);
+ int64_t Offset = MemOffset->getSExtValue();
+
+ // Some flags in addition needs to be carried to new memop.
+ PPCII::TOF NewOpFlags = PPCII::MO_NO_FLAG;
+ if (BaseOpc == PPC::ADDIdtprelL)
+ NewOpFlags = PPCII::MO_DTPREL_LO;
+ else if (BaseOpc == PPC::ADDItlsldL)
+ NewOpFlags = PPCII::MO_TLSLD_LO;
+ else if (BaseOpc == PPC::ADDItocL)
+ NewOpFlags = PPCII::MO_TOC_LO;
+
+ if (NewOpFlags) {
+ if (Offset < 0 || Offset > MaxDisplacement) {
+ // Check base opcode and its uses, quit if it has multiple uses.
+ if (MemBase.getMachineOpcode() != PPC::ADDItocL || !MemBase.hasOneUse() ||
+ !HaBase.isMachineOpcode() || !HaBase.hasOneUse() ||
+ HaBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
+ HaBase.getOperand(1) != ImmOp)
+ return;
+ UpdateHaBase = true;
}
- SDValue ImmOpnd = Base.getOperand(1);
-
- // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
- // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
- // we might have needed different @ha relocation values for the offset
- // pointers).
- int MaxDisplacement = 7;
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
- const GlobalValue *GV = GA->getGlobal();
- Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
- MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
+ if (const auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOp)) {
+ // We can't perform this optimization for data whose alignment is
+ // insufficient for the instruction encoding.
+ if (ImmAlign && ImmAlign.value() < Align(4) &&
+ (ExtraAlign || (Offset % 4) != 0))
+ return;
+ ImmOp = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(ImmOp),
+ MVT::i64, Offset, NewOpFlags);
+ } else if (const auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOp)) {
+ ImmOp = DAG->getTargetConstantPool(CP->getConstVal(), MVT::i64,
+ CP->getAlign(), Offset, NewOpFlags);
}
+ } else {
+ if (ImmAlign && ExtraAlign && ImmAlign.value() < ExtraAlign.value())
+ return;
+ if (auto *C = dyn_cast<ConstantSDNode>(ImmOp)) {
+ Offset += C->getSExtValue();
- bool UpdateHBase = false;
- SDValue HBase = Base.getOperand(0);
-
- int Offset = N->getConstantOperandVal(FirstOp);
- if (ReplaceFlags) {
- if (Offset < 0 || Offset > MaxDisplacement) {
- // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
- // one use, then we can do this for any offset, we just need to also
- // update the offset (i.e. the symbol addend) on the addis also.
- if (Base.getMachineOpcode() != PPC::ADDItocL)
- continue;
-
- if (!HBase.isMachineOpcode() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA8)
- continue;
-
- if (!Base.hasOneUse() || !HBase.hasOneUse())
- continue;
-
- SDValue HImmOpnd = HBase.getOperand(1);
- if (HImmOpnd != ImmOpnd)
- continue;
-
- UpdateHBase = true;
- }
- } else {
- // Global addresses can be folded, but only if they are sufficiently
- // aligned.
- if (RequiresMod4Offset) {
- if (GlobalAddressSDNode *GA =
- dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
- const GlobalValue *GV = GA->getGlobal();
- Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
- if (Alignment < 4)
- continue;
- }
+ if ((Offset % ExtraAlign.valueOrOne().value()) != 0 || !isInt<16>(Offset))
+ return;
+ ImmOp =
+ DAG->getTargetConstant(Offset, SDLoc(ImmOp), ImmOp.getValueType());
+ } else if (Offset != 0) {
+ // This optimization is performed for non-TOC-based local-exec accesses.
+ if (Subtarget->hasAIXSmallLocalExecTLS() &&
+ isEligibleToFoldADDIForLocalExecAccesses(DAG, MemBase)) {
+ // Add the non-zero offset information into the load or store
+ // instruction to be used for non-TOC-based local-exec accesses.
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOp);
+ assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
+ "addi into local-exec accesses!");
+ ImmOp = DAG->getTargetGlobalAddress(
+ GA->getGlobal(), SDLoc(GA), MVT::i64, Offset, GA->getTargetFlags());
+ } else {
+ return;
}
+ }
+ }
- // If we're directly folding the addend from an addi instruction, then:
- // 1. In general, the offset on the memory access must be zero.
- // 2. If the addend is a constant, then it can be combined with a
- // non-zero offset, but only if the result meets the encoding
- // requirements.
- if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
- Offset += C->getSExtValue();
+ LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ LLVM_DEBUG(MemBase->dump(DAG));
+ LLVM_DEBUG(dbgs() << "\nN: ");
+ LLVM_DEBUG(N->dump(DAG));
+ LLVM_DEBUG(dbgs() << "\n");
- if (RequiresMod4Offset && (Offset % 4) != 0)
- continue;
+ if (IsLoad)
+ (void)DAG->UpdateNodeOperands(N, ImmOp, RegOp, N->getOperand(2));
+ else
+ (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOp, RegOp,
+ N->getOperand(3));
- if (!isInt<16>(Offset))
- continue;
+ if (UpdateHaBase)
+ (void)DAG->UpdateNodeOperands(HaBase.getNode(), HaBase.getOperand(0),
+ ImmOp);
- ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
- ImmOpnd.getValueType());
- } else if (Offset != 0) {
- // This optimization is performed for non-TOC-based local-exec accesses.
- if (HasAIXSmallLocalExecTLS &&
- isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
- // Add the non-zero offset information into the load or store
- // instruction to be used for non-TOC-based local-exec accesses.
- GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
- assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
- "addi into local-exec accesses!");
- ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
- MVT::i64, Offset,
- GA->getTargetFlags());
- } else
- continue;
- }
- }
+ if (MemBase.getNode()->use_empty())
+ DAG->RemoveDeadNode(MemBase.getNode());
+}
- // We found an opportunity. Reverse the operands from the add
- // immediate and substitute them into the load or store. If
- // needed, update the target flags for the immediate operand to
- // reflect the necessary relocation information.
- LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
- LLVM_DEBUG(Base->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\nN: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
+void PPCDAGToDAGISel::PeepholePPC64() {
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+ bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
- // If the relocation information isn't already present on the
- // immediate operand, add it now.
- if (ReplaceFlags) {
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
- SDLoc dl(GA);
- const GlobalValue *GV = GA->getGlobal();
- Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
- // We can't perform this optimization for data whose alignment
- // is insufficient for the instruction encoding.
- if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
- LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
- continue;
- }
- ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
- } else if (ConstantPoolSDNode *CP =
- dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
- const Constant *C = CP->getConstVal();
- ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
- Offset, Flags);
- }
- }
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
- if (FirstOp == 1) // Store
- (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
- Base.getOperand(0), N->getOperand(3));
- else // Load
- (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
- N->getOperand(2));
+ if (isVSXSwap(SDValue(N, 0)))
+ reduceVSXSwap(N, CurDAG);
- if (UpdateHBase)
- (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
- ImmOpnd);
+ // This optimization is performed for non-TOC-based local-exec accesses.
+ if (HasAIXSmallLocalExecTLS)
+ foldADDIForLocalExecAccesses(N, CurDAG);
- // The add-immediate may now be dead, in which case remove it.
- if (Base.getNode()->use_empty())
- CurDAG->RemoveDeadNode(Base.getNode());
+ peepholeMemOffset(N, CurDAG, Subtarget);
}
}
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-common.ll b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
index 7747f2eecc935e..aa9db48c9937f4 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data-common.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefix=CHECK
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefix=CHECK
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefixes=CHECK32,CHECK
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefixes=CHECK64,CHECK
; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
; RUN: llvm-objdump -t --symbol-description %t32.o | FileCheck %s --check-prefix=OBJ32
@@ -14,17 +14,27 @@
@a4 = global i32 0, align 4
define void @set(i32 noundef %_a) {
-; CHECK-LABEL: set:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 4, a2[TD](2)
-; CHECK-NEXT: la 5, a1[TD](2)
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: [[INSTR]] 4, L..C0(2) # @a4
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: [[INSTR]] 5, L..C1(2) # @a3
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: set:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: la 4, a2[TD](2)
+; CHECK32-NEXT: la 5, a1[TD](2)
+; CHECK32-NEXT: stw 3, 0(4)
+; CHECK32-NEXT: lwz 4, L..C0(2) # @a4
+; CHECK32-NEXT: stw 3, 0(5)
+; CHECK32-NEXT: lwz 5, L..C1(2) # @a3
+; CHECK32-NEXT: stw 3, 0(4)
+; CHECK32-NEXT: stw 3, 0(5)
+; CHECK32-NEXT: blr
+;
+; CHECK64-LABEL: set:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: ld 4, L..C0(2) # @a4
+; CHECK64-NEXT: stw 3, a2[TD](2)
+; CHECK64-NEXT: stw 3, a1[TD](2)
+; CHECK64-NEXT: stw 3, 0(4)
+; CHECK64-NEXT: ld 4, L..C1(2) # @a3
+; CHECK64-NEXT: stw 3, 0(4)
+; CHECK64-NEXT: blr
entry:
store i32 %_a, ptr @a2, align 4
store i32 %_a, ptr @a1, align 4
@@ -34,81 +44,123 @@ ret void
}
define i32 @get1() {
-; CHECK-LABEL: get1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a2[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get1:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: la 3, a2[TD](2)
+; CHECK32-NEXT: lwz 3, 0(3)
+; CHECK32-NEXT: blr
+;
+; CHECK64-LABEL: get1:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: lwz 3, a2[TD](2)
+; CHECK64-NEXT: blr
entry:
%0 = load i32, ptr @a2, align 4
ret i32 %0
}
define i32 @get2() {
-; CHECK-LABEL: get2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a1[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get2:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: la 3, a1[TD](2)
+; CHECK32-NEXT: lwz 3, 0(3)
+; CHECK32-NEXT: blr
+;
+; CHECK64-LABEL: get2:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: lwz 3, a1[TD](2)
+; CHECK64-NEXT: blr
entry:
%0 = load i32, ptr @a1, align 4
ret i32 %0
}
define i32 @get3() {
-; CHECK-LABEL: get3:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get3:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: lwz 3, L..C0(2) # @a4
+; CHECK32-NEXT: lwz 3, 0(3)
+; CHECK32-NEXT: blr
+;
+; CHECK64-LABEL: get3:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: ld 3, L..C0(2) # @a4
+; CHECK64-NEXT: lwz 3, 0(3)
+; CHECK64-NEXT: blr
entry:
%0 = load i32, ptr @a4, align 4
ret i32 %0
}
define i32 @get4() {
-; CHECK-LABEL: get4:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK32-LABEL: get4:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: lwz 3, L..C1(2) # @a3
+; CHECK32-NEXT: lwz 3, 0(3)
+; CHECK32-NEXT: blr
+;
+; CHECK64-LABEL: get4:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: ld 3, L..C1(2) # @a3
+; CHECK64-NEXT: lwz 3, 0(3)
+; CHECK64-NEXT: blr
entry:
%0 = ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76488
More information about the llvm-commits
mailing list