[clang-tools-extra] [clang] [llvm] [PowerPC] Peephole address calculation in TOC memops (PR #76488)
Qiu Chaofan via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 17 02:05:46 PST 2024
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/76488
>From 7eb909423d49ea19d9978b097ceb8c4a95fc7bac Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 28 Dec 2023 11:09:07 +0800
Subject: [PATCH 1/5] [PowerPC] Peephole address calculation in TOC memops
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 391 +++++++++---------
llvm/test/CodeGen/PowerPC/toc-data-const.ll | 45 +-
.../PowerPC/toc-data-peephole-aligment.ll | 16 +
llvm/test/CodeGen/PowerPC/toc-data.ll | 22 +-
4 files changed, 253 insertions(+), 221 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b57d185bb638b8..c96bf3204a32ed 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7567,224 +7567,233 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
}
-void PPCDAGToDAGISel::PeepholePPC64() {
- SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-
- while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = &*--Position;
- // Skip dead nodes and any non-machine opcodes.
- if (N->use_empty() || !N->isMachineOpcode())
- continue;
-
- if (isVSXSwap(SDValue(N, 0)))
- reduceVSXSwap(N, CurDAG);
-
- unsigned FirstOp;
- unsigned StorageOpcode = N->getMachineOpcode();
- bool RequiresMod4Offset = false;
+static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
+ const PPCSubtarget *Subtarget) {
+ unsigned StorageOpcode = N->getMachineOpcode();
+ bool IsLoad = false;
+ SDValue MemOffset, MemBase;
+ bool IsToc = false;
+
+ // TODO: Enable for AIX 32-bit
+ if (!Subtarget->isPPC64())
+ return;
- switch (StorageOpcode) {
- default: continue;
+ // Global must be word-aligned for LD, STD, LWA.
+ unsigned ExtraAlign = 0;
+ switch (StorageOpcode) {
+ default:
+ return;
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::DFLOADf64:
+ case PPC::DFLOADf32:
+ ExtraAlign = 4;
+ [[fallthrough]];
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ IsLoad = true;
+ MemOffset = N->getOperand(0);
+ MemBase = N->getOperand(1);
+ break;
+ case PPC::STD:
+ case PPC::DFSTOREf64:
+ case PPC::DFSTOREf32:
+ ExtraAlign = 4;
+ [[fallthrough]];
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ MemOffset = N->getOperand(1);
+ MemBase = N->getOperand(2);
+ break;
+ }
- case PPC::LWA:
- case PPC::LD:
- case PPC::DFLOADf64:
- case PPC::DFLOADf32:
- RequiresMod4Offset = true;
- [[fallthrough]];
- case PPC::LBZ:
- case PPC::LBZ8:
- case PPC::LFD:
- case PPC::LFS:
- case PPC::LHA:
- case PPC::LHA8:
- case PPC::LHZ:
- case PPC::LHZ8:
- case PPC::LWZ:
- case PPC::LWZ8:
- FirstOp = 0;
- break;
+ // Only constant offsets can be folded.
+ if (!isa<ConstantSDNode>(MemOffset) || !MemBase.isMachineOpcode())
+ return;
- case PPC::STD:
- case PPC::DFSTOREf64:
- case PPC::DFSTOREf32:
- RequiresMod4Offset = true;
- [[fallthrough]];
- case PPC::STB:
- case PPC::STB8:
- case PPC::STFD:
- case PPC::STFS:
- case PPC::STH:
- case PPC::STH8:
- case PPC::STW:
- case PPC::STW8:
- FirstOp = 1;
- break;
+ auto CheckAlign = [DAG](const SDValue &Val, unsigned TargetAlign) {
+ if (TargetAlign == 0)
+ return true;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val)) {
+ const GlobalValue *GV = GA->getGlobal();
+ Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
+ if (Alignment < TargetAlign)
+ return false;
}
+ return true;
+ };
- // If this is a load or store with a zero offset, or within the alignment,
- // we may be able to fold an add-immediate into the memory operation.
- // The check against alignment is below, as it can't occur until we check
- // the arguments to N
- if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
- continue;
-
- SDValue Base = N->getOperand(FirstOp + 1);
- if (!Base.isMachineOpcode())
- continue;
+ // We may need to carry relocation information in ADDI to load.
+ std::optional<PPCII::TOF> NewOpFlags;
+ switch (MemBase.getMachineOpcode()) {
+ default:
+ return;
+ case PPC::ADDI8:
+ case PPC::ADDI:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ break;
+ case PPC::ADDIdtprelL:
+ NewOpFlags = PPCII::MO_DTPREL_LO;
+ break;
+ case PPC::ADDItlsldL:
+ NewOpFlags = PPCII::MO_TLSLD_LO;
+ break;
+ case PPC::ADDItocL:
+ NewOpFlags = PPCII::MO_TOC_LO;
+ break;
+ case PPC::ADDItoc:
+ case PPC::ADDItoc8:
+ IsToc = true;
+ if (!CheckAlign(MemBase.getOperand(0), ExtraAlign))
+ return;
+ break;
+ }
- unsigned Flags = 0;
- bool ReplaceFlags = true;
+ SDValue ImmOpnd = MemBase.getOperand(1);
- // When the feeding operation is an add-immediate of some sort,
- // determine whether we need to add relocation information to the
- // target flags on the immediate operand when we fold it into the
- // load instruction.
- //
- // For something like ADDItocL, the relocation information is
- // inferred from the opcode; when we process it in the AsmPrinter,
- // we add the necessary relocation there. A load, though, can receive
- // relocation from various flavors of ADDIxxx, so we need to carry
- // the relocation information in the target flags.
- switch (Base.getMachineOpcode()) {
- default: continue;
-
- case PPC::ADDI8:
- case PPC::ADDI:
- // In some cases (such as TLS) the relocation information
- // is already in place on the operand, so copying the operand
- // is sufficient.
- ReplaceFlags = false;
- break;
- case PPC::ADDIdtprelL:
- Flags = PPCII::MO_DTPREL_LO;
- break;
- case PPC::ADDItlsldL:
- Flags = PPCII::MO_TLSLD_LO;
- break;
- case PPC::ADDItocL:
- Flags = PPCII::MO_TOC_LO;
- break;
- }
+ // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
+ // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
+ // we might have needed different @ha relocation values for the offset
+ // pointers).
+ int MaxDisplacement = 7;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ const GlobalValue *GV = GA->getGlobal();
+ Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
+ MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
+ }
- SDValue ImmOpnd = Base.getOperand(1);
+ bool UpdateHBase = false;
+ SDValue HBase = MemBase.getOperand(0);
- // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
- // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
- // we might have needed different @ha relocation values for the offset
- // pointers).
- int MaxDisplacement = 7;
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
- const GlobalValue *GV = GA->getGlobal();
- Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
- MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
- }
+ int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
+ if (NewOpFlags) {
+ if (Offset < 0 || Offset > MaxDisplacement) {
+ // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
+ // one use, then we can do this for any offset, we just need to also
+ // update the offset (i.e. the symbol addend) on the addis also.
+ if (MemBase.getMachineOpcode() != PPC::ADDItocL) {
+ return;
+ }
- bool UpdateHBase = false;
- SDValue HBase = Base.getOperand(0);
+ if (!HBase.isMachineOpcode() ||
+ HBase.getMachineOpcode() != PPC::ADDIStocHA8) {
+ return;
+ }
- int Offset = N->getConstantOperandVal(FirstOp);
- if (ReplaceFlags) {
- if (Offset < 0 || Offset > MaxDisplacement) {
- // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
- // one use, then we can do this for any offset, we just need to also
- // update the offset (i.e. the symbol addend) on the addis also.
- if (Base.getMachineOpcode() != PPC::ADDItocL)
- continue;
+ if (!MemBase.hasOneUse() || !HBase.hasOneUse()) {
+ return;
+ }
- if (!HBase.isMachineOpcode() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA8)
- continue;
+ SDValue HImmOpnd = HBase.getOperand(1);
+ if (HImmOpnd != ImmOpnd) {
+ return;
+ }
- if (!Base.hasOneUse() || !HBase.hasOneUse())
- continue;
+ UpdateHBase = true;
+ }
+ } else {
+ // Global addresses can be folded, only if they are sufficiently aligned.
+ if (!CheckAlign(ImmOpnd, ExtraAlign))
+ return;
- SDValue HImmOpnd = HBase.getOperand(1);
- if (HImmOpnd != ImmOpnd)
- continue;
+ if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
+ Offset += C->getSExtValue();
- UpdateHBase = true;
- }
- } else {
- // Global addresses can be folded, but only if they are sufficiently
- // aligned.
- if (RequiresMod4Offset) {
- if (GlobalAddressSDNode *GA =
- dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
- const GlobalValue *GV = GA->getGlobal();
- Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
- if (Alignment < 4)
- continue;
- }
+ if (ExtraAlign && (Offset % ExtraAlign) != 0) {
+ return;
}
- // If we're directly folding the addend from an addi instruction, then:
- // 1. In general, the offset on the memory access must be zero.
- // 2. If the addend is a constant, then it can be combined with a
- // non-zero offset, but only if the result meets the encoding
- // requirements.
- if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
- Offset += C->getSExtValue();
+ if (!isInt<16>(Offset)) {
+ return;
+ }
- if (RequiresMod4Offset && (Offset % 4) != 0)
- continue;
+ ImmOpnd = DAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
+ ImmOpnd.getValueType());
+ } else if (Offset != 0) {
+ return;
+ }
+ }
- if (!isInt<16>(Offset))
- continue;
+ LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ LLVM_DEBUG(MemBase->dump(DAG));
+ LLVM_DEBUG(dbgs() << "\nN: ");
+ LLVM_DEBUG(N->dump(DAG));
+ LLVM_DEBUG(dbgs() << "\n");
- ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
- ImmOpnd.getValueType());
- } else if (Offset != 0) {
- continue;
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (NewOpFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ const GlobalValue *GV = GA->getGlobal();
+ Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
+ // We can't perform this optimization for data whose alignment
+ // is insufficient for the instruction encoding.
+ if (Alignment < 4 && (ExtraAlign || (Offset % 4) != 0)) {
+ LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
+ return;
}
+ ImmOpnd = DAG->getTargetGlobalAddress(GV, SDLoc(GA), MVT::i64, Offset,
+ NewOpFlags.value());
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = DAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), Offset,
+ NewOpFlags.value());
}
+ }
- // We found an opportunity. Reverse the operands from the add
- // immediate and substitute them into the load or store. If
- // needed, update the target flags for the immediate operand to
- // reflect the necessary relocation information.
- LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
- LLVM_DEBUG(Base->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\nN: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
+ if (IsLoad) {
+ if (IsToc)
+ (void)DAG->UpdateNodeOperands(N, MemBase.getOperand(0),
+ MemBase.getOperand(1), N->getOperand(2));
+ else
+ (void)DAG->UpdateNodeOperands(N, ImmOpnd, MemBase.getOperand(0),
+ N->getOperand(2));
+ } else {
+ if (IsToc)
+ (void)DAG->UpdateNodeOperands(N, N->getOperand(0), MemBase.getOperand(0),
+ MemBase.getOperand(1), N->getOperand(3));
+ else
+ (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ MemBase.getOperand(0), N->getOperand(3));
+ }
- // If the relocation information isn't already present on the
- // immediate operand, add it now.
- if (ReplaceFlags) {
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
- SDLoc dl(GA);
- const GlobalValue *GV = GA->getGlobal();
- Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
- // We can't perform this optimization for data whose alignment
- // is insufficient for the instruction encoding.
- if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
- LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
- continue;
- }
- ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
- } else if (ConstantPoolSDNode *CP =
- dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
- const Constant *C = CP->getConstVal();
- ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
- Offset, Flags);
- }
- }
+ if (UpdateHBase)
+ (void)DAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
+ ImmOpnd);
- if (FirstOp == 1) // Store
- (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
- Base.getOperand(0), N->getOperand(3));
- else // Load
- (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
- N->getOperand(2));
+ if (MemBase.getNode()->use_empty())
+ DAG->RemoveDeadNode(MemBase.getNode());
+}
- if (UpdateHBase)
- (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
- ImmOpnd);
+void PPCDAGToDAGISel::PeepholePPC64() {
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ if (isVSXSwap(SDValue(N, 0)))
+ reduceVSXSwap(N, CurDAG);
- // The add-immediate may now be dead, in which case remove it.
- if (Base.getNode()->use_empty())
- CurDAG->RemoveDeadNode(Base.getNode());
+ peepholeMemOffset(N, CurDAG, Subtarget);
}
}
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-const.ll b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
index 740032e26a432e..6078271738b7cf 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data-const.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
@@ -1,5 +1,8 @@
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,NOOPT
+
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,OPT
; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
; RUN: llvm-readobj %t32.o --syms --relocs | FileCheck %s --check-prefix=OBJ32
@@ -23,15 +26,19 @@ define ptr @retptr() {
attributes #0 = { "toc-data" }
-; CHECK: .read:
-; CHECK: la 3, i1[TD](2)
+; CHECK-LABEL: .read:
+; NOOPT: la 3, i1[TD](2)
+; NOOPT: lwz 3, 0(3)
+; OPT: lwz 3, i1[TD](2)
-; CHECK: .retptr:
-; CHECK: la 3, i2[TD](2)
+; CHECK-LABEL: .retptr:
+; CHECK: la 3, i2[TD](2)
+; CHECK-NEXT: blr
; CHECK-DAG: .toc
; CHECK: .extern i1[TD]
-; CHECK: .csect i2[TD]
+; CHECK32: .csect i2[TD],2
+; CHECK64: .csect i2[TD],3
; OBJ32: Relocations [
; OBJ32-NEXT: Section (index: 1) .text {
@@ -111,14 +118,14 @@ attributes #0 = { "toc-data" }
; OBJ64: Relocations [
; OBJ64-NEXT: Section (index: 1) .text {
; OBJ64-NEXT: 0x2 R_TOC i1(1) 0xF
-; OBJ64-NEXT: 0x26 R_TOC i2(15) 0xF
+; OBJ64-NEXT: 0x22 R_TOC i2(15) 0xF
; OBJ64-NEXT: }
; OBJ64-NEXT: Section (index: 2) .data {
-; OBJ64-NEXT: 0x48 R_POS .read(5) 0x3F
-; OBJ64-NEXT: 0x50 R_POS TOC(13) 0x3F
-; OBJ64-NEXT: 0x60 R_POS .retptr(7) 0x3F
-; OBJ64-NEXT: 0x68 R_POS TOC(13) 0x3F
-; OBJ64-NEXT: 0x78 R_POS i1(1) 0x3F
+; OBJ64-NEXT: 0x40 R_POS .read(5) 0x3F
+; OBJ64-NEXT: 0x48 R_POS TOC(13) 0x3F
+; OBJ64-NEXT: 0x58 R_POS .retptr(7) 0x3F
+; OBJ64-NEXT: 0x60 R_POS TOC(13) 0x3F
+; OBJ64-NEXT: 0x70 R_POS i1(1) 0x3F
; OBJ64-NEXT: }
; OBJ64-NEXT: ]
@@ -144,7 +151,7 @@ attributes #0 = { "toc-data" }
; OBJ64: Symbol {
; OBJ64: Index: 13
; OBJ64-NEXT: Name: TOC
-; OBJ64-NEXT: Value (RelocatableAddress): 0x78
+; OBJ64-NEXT: Value (RelocatableAddress): 0x70
; OBJ64-NEXT: Section: .data
; OBJ64-NEXT: Type: 0x0
; OBJ64-NEXT: StorageClass: C_HIDEXT (0x6B)
@@ -163,7 +170,7 @@ attributes #0 = { "toc-data" }
; OBJ64: Symbol {
; OBJ64: Index: 15
; OBJ64-NEXT: Name: i2
-; OBJ64-NEXT: Value (RelocatableAddress): 0x78
+; OBJ64-NEXT: Value (RelocatableAddress): 0x70
; OBJ64-NEXT: Section: .data
; OBJ64-NEXT: Type: 0x0
; OBJ64-NEXT: StorageClass: C_EXT (0x2)
@@ -188,8 +195,8 @@ attributes #0 = { "toc-data" }
; DIS32-NEXT: 00000026: R_TOC i2
; DIS64: 0000000000000000 <.read>:
-; DIS64-NEXT: 0: 38 62 00 00 addi 3, 2, 0
+; DIS64-NEXT: 0: 80 62 00 00 lwz 3, 0(2)
; DIS64-NEXT: 0000000000000002: R_TOC i1
-; DIS64: 0000000000000024 <.retptr>:
-; DIS64-NEXT: 24: 38 62 00 00 addi 3, 2, 0
-; DIS64-NEXT: 0000000000000026: R_TOC i2
+; DIS64: 0000000000000020 <.retptr>:
+; DIS64-NEXT: 20: 38 62 00 00 addi 3, 2, 0
+; DIS64-NEXT: 0000000000000022: R_TOC i2
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll
new file mode 100644
index 00000000000000..8ec5d9fd331750
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s
+
+ at underaligned = dso_local global i32 123, align 1 #0
+
+define i64 @read() {
+entry:
+ %0 = load i32, ptr @underaligned, align 1
+ %1 = sext i32 %0 to i64
+ ret i64 %1
+}
+
+attributes #0 = { "toc-data" }
+
+; CHECK-LABEL: .read
+; CHECK: la [[DEF:[0-9]+]], underaligned[TD](2)
+; CHCEK: lwa {{[0-9]+}}, 0([[DEF]])
diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll
index cbf3be9fcaad05..a8a7b5d4d386f3 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data.ll
@@ -3,14 +3,14 @@
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
; RUN: -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK64
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST64
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TEST64,ASMOPT64
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s \
; RUN: -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK32
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
; RUN: -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK64-NOOPT
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefixes=TEST64,ASMNOOPT64
@i = dso_local global i32 0, align 4 #0
@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
@@ -32,8 +32,7 @@ define dso_local void @write_int(i32 signext %in) {
; TEST32-NEXT: stw 3, 0(4)
; CHECK64: name: write_int
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
-; CHECK64-NEXT: STW8 %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
+; CHECK64: STW8 %{{[0-9]+}}, @i, $x2 :: (store (s32) into @i)
; CHECK64-NOOPT: name: write_int
; CHECK64-NOOPT: %[[SUBREG:[0-9]+]]:gprc = COPY %{{[0-9]}}.sub_32
@@ -41,8 +40,9 @@ define dso_local void @write_int(i32 signext %in) {
; CHECK64-NOOPT: STW %[[SUBREG]], 0, killed %[[ADDR]] :: (store (s32) into @i)
; TEST64: .write_int:
-; TEST64: la 4, i[TD](2)
-; TEST64-NEXT: stw 3, 0(4)
+; ASMNOOPT64: la 4, i[TD](2)
+; ASMNOOPT64-NEXT: stw 3, 0(4)
+; ASMOPT64: stw 3, i[TD](2)
define dso_local i64 @read_ll() {
@@ -85,16 +85,16 @@ define dso_local float @read_float() {
; TEST32-NEXT: lfs 1, 0(3)
; CHECK64: name: read_float
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
-; CHECK64: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
+; CHECK64: %{{[0-9]+}}:f4rc = LFS @f, $x2 :: (dereferenceable load (s32) from @f)
; CHECK64-NOOPT: name: read_float
; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
; CHECK64-NOOPT: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]]
-; TEST64: .read_float:
-; TEST64: la 3, f[TD](2)
-; TEST64-NEXT: lfs 1, 0(3)
+; TEST64: .read_float:
+; ASMNOOPT64: la 3, f[TD](2)
+; ASMNOOPT64-NEXT: lfs 1, 0(3)
+; ASMOPT64: lfs 1, f[TD](2)
define dso_local void @write_double(double %in) {
>From e7001e9027b5b09d0856c67942f3437374e031c5 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 11 Jan 2024 13:53:01 +0800
Subject: [PATCH 2/5] Reverse operands
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 57 ++++++---------------
1 file changed, 15 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 95b391f9c0d3bc..353367514b8789 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7682,26 +7682,12 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
if (NewOpFlags) {
if (Offset < 0 || Offset > MaxDisplacement) {
- // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
- // one use, then we can do this for any offset, we just need to also
- // update the offset (i.e. the symbol addend) on the addis also.
- if (MemBase.getMachineOpcode() != PPC::ADDItocL) {
+ // Check base opcode and its uses, quit if it has multiple uses.
+ if (MemBase.getMachineOpcode() != PPC::ADDItocL ||
+ !HBase.isMachineOpcode() ||
+ HBase.getMachineOpcode() != PPC::ADDIStocHA8 || !MemBase.hasOneUse() ||
+ !HBase.hasOneUse() || HBase.getOperand(1) != ImmOpnd)
return;
- }
-
- if (!HBase.isMachineOpcode() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA8) {
- return;
- }
-
- if (!MemBase.hasOneUse() || !HBase.hasOneUse()) {
- return;
- }
-
- SDValue HImmOpnd = HBase.getOperand(1);
- if (HImmOpnd != ImmOpnd) {
- return;
- }
UpdateHBase = true;
}
@@ -7713,13 +7699,8 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
Offset += C->getSExtValue();
- if (ExtraAlign && (Offset % ExtraAlign) != 0) {
+ if ((ExtraAlign && (Offset % ExtraAlign) != 0) || !isInt<16>(Offset))
return;
- }
-
- if (!isInt<16>(Offset)) {
- return;
- }
ImmOpnd = DAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
ImmOpnd.getValueType());
@@ -7734,8 +7715,7 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
LLVM_DEBUG(N->dump(DAG));
LLVM_DEBUG(dbgs() << "\n");
- // If the relocation information isn't already present on the
- // immediate operand, add it now.
+ // Add relocation flag if not present on the immediate operand.
if (NewOpFlags) {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
const GlobalValue *GV = GA->getGlobal();
@@ -7755,21 +7735,14 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
}
}
- if (IsLoad) {
- if (IsToc)
- (void)DAG->UpdateNodeOperands(N, MemBase.getOperand(0),
- MemBase.getOperand(1), N->getOperand(2));
- else
- (void)DAG->UpdateNodeOperands(N, ImmOpnd, MemBase.getOperand(0),
- N->getOperand(2));
- } else {
- if (IsToc)
- (void)DAG->UpdateNodeOperands(N, N->getOperand(0), MemBase.getOperand(0),
- MemBase.getOperand(1), N->getOperand(3));
- else
- (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
- MemBase.getOperand(0), N->getOperand(3));
- }
+ SDValue RegOpnd = MemBase.getOperand(0);
+ if (IsToc)
+ std::swap(RegOpnd, ImmOpnd);
+ if (IsLoad)
+ (void)DAG->UpdateNodeOperands(N, ImmOpnd, RegOpnd, N->getOperand(2));
+ else
+ (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, RegOpnd,
+ N->getOperand(3));
if (UpdateHBase)
(void)DAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
>From 6436d4c0298d7cc0b8ad9254ad1dd55ecef51ff2 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 16 Jan 2024 18:09:45 +0800
Subject: [PATCH 3/5] Comment reversed operands of ADDItoc
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 30 ++++++++++-----------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 353367514b8789..ab12864de6d546 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7570,7 +7570,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
unsigned StorageOpcode = N->getMachineOpcode();
bool IsLoad = false;
SDValue MemOffset, MemBase;
- bool IsToc = false;
// TODO: Enable for AIX 32-bit
if (!Subtarget->isPPC64())
@@ -7619,10 +7618,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
break;
}
- // Only constant offsets can be folded.
- if (!isa<ConstantSDNode>(MemOffset) || !MemBase.isMachineOpcode())
- return;
-
auto CheckAlign = [DAG](const SDValue &Val, unsigned TargetAlign) {
if (TargetAlign == 0)
return true;
@@ -7635,8 +7630,17 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
return true;
};
- // We may need to carry relocation information in ADDI to load.
+ // Only additions with constant offsets will be folded.
+ if (!isa<ConstantSDNode>(MemOffset) || !MemBase.isMachineOpcode())
+ return;
+
+ // Some flags in addition needs to be carried to new memop.
std::optional<PPCII::TOF> NewOpFlags;
+ SDValue ImmOpnd, RegOpnd;
+ if (MemBase.getNumOperands() == 2) {
+ ImmOpnd = MemBase.getOperand(1);
+ RegOpnd = MemBase.getOperand(0);
+ }
switch (MemBase.getMachineOpcode()) {
default:
return;
@@ -7657,14 +7661,14 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
break;
case PPC::ADDItoc:
case PPC::ADDItoc8:
- IsToc = true;
+ // ADDItoc and ADDItoc8 (prints 'la') put register at second operand.
+ std::swap(ImmOpnd, RegOpnd);
if (!CheckAlign(MemBase.getOperand(0), ExtraAlign))
return;
break;
}
- SDValue ImmOpnd = MemBase.getOperand(1);
-
+ // TODO: Why?
// On PPC64, the TOC base pointer is guaranteed by the ABI only to have
// 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
// we might have needed different @ha relocation values for the offset
@@ -7682,6 +7686,7 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
if (NewOpFlags) {
if (Offset < 0 || Offset > MaxDisplacement) {
+ // TODO: Why?
// Check base opcode and its uses, quit if it has multiple uses.
if (MemBase.getMachineOpcode() != PPC::ADDItocL ||
!HBase.isMachineOpcode() ||
@@ -7695,13 +7700,11 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
// Global addresses can be folded, only if they are sufficiently aligned.
if (!CheckAlign(ImmOpnd, ExtraAlign))
return;
-
if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
Offset += C->getSExtValue();
if ((ExtraAlign && (Offset % ExtraAlign) != 0) || !isInt<16>(Offset))
return;
-
ImmOpnd = DAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
ImmOpnd.getValueType());
} else if (Offset != 0) {
@@ -7715,8 +7718,8 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
LLVM_DEBUG(N->dump(DAG));
LLVM_DEBUG(dbgs() << "\n");
- // Add relocation flag if not present on the immediate operand.
if (NewOpFlags) {
+ // Add relocation flag if not present on the immediate operand.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
const GlobalValue *GV = GA->getGlobal();
Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
@@ -7735,9 +7738,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
}
}
- SDValue RegOpnd = MemBase.getOperand(0);
- if (IsToc)
- std::swap(RegOpnd, ImmOpnd);
if (IsLoad)
(void)DAG->UpdateNodeOperands(N, ImmOpnd, RegOpnd, N->getOperand(2));
else
>From 4c98b55ce46e1d81b0a8b6959db05a83df92f893 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 16 Jan 2024 18:15:38 +0800
Subject: [PATCH 4/5] Fix format
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index ab12864de6d546..c53adb5e2ced0b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7668,7 +7668,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
break;
}
- // TODO: Why?
// On PPC64, the TOC base pointer is guaranteed by the ABI only to have
// 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
// we might have needed different @ha relocation values for the offset
@@ -7686,12 +7685,11 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
if (NewOpFlags) {
if (Offset < 0 || Offset > MaxDisplacement) {
- // TODO: Why?
// Check base opcode and its uses, quit if it has multiple uses.
- if (MemBase.getMachineOpcode() != PPC::ADDItocL ||
- !HBase.isMachineOpcode() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA8 || !MemBase.hasOneUse() ||
- !HBase.hasOneUse() || HBase.getOperand(1) != ImmOpnd)
+ if (MemBase.getMachineOpcode() != PPC::ADDItocL || !MemBase.hasOneUse() ||
+ !HBase.isMachineOpcode() || !HBase.hasOneUse() ||
+ HBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
+ HBase.getOperand(1) != ImmOpnd)
return;
UpdateHBase = true;
>From 2f8de37d10067a05f78a7dcea976d2f484d3f02f Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Wed, 17 Jan 2024 18:05:25 +0800
Subject: [PATCH 5/5] Rename HBase
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index c53adb5e2ced0b..dd5aada811fb73 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7679,20 +7679,21 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
}
- bool UpdateHBase = false;
- SDValue HBase = MemBase.getOperand(0);
+ // If addis also contributes to TOC relocation, it also needs to be updated.
+ bool UpdateHaBase = false;
+ SDValue HaBase = MemBase.getOperand(0);
int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
if (NewOpFlags) {
if (Offset < 0 || Offset > MaxDisplacement) {
// Check base opcode and its uses, quit if it has multiple uses.
if (MemBase.getMachineOpcode() != PPC::ADDItocL || !MemBase.hasOneUse() ||
- !HBase.isMachineOpcode() || !HBase.hasOneUse() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
- HBase.getOperand(1) != ImmOpnd)
+ !HaBase.isMachineOpcode() || !HaBase.hasOneUse() ||
+ HaBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
+ HaBase.getOperand(1) != ImmOpnd)
return;
- UpdateHBase = true;
+ UpdateHaBase = true;
}
} else {
// Global addresses can be folded, only if they are sufficiently aligned.
@@ -7742,8 +7743,8 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
(void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, RegOpnd,
N->getOperand(3));
- if (UpdateHBase)
- (void)DAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
+ if (UpdateHaBase)
+ (void)DAG->UpdateNodeOperands(HaBase.getNode(), HaBase.getOperand(0),
ImmOpnd);
if (MemBase.getNode()->use_empty())
More information about the cfe-commits
mailing list