[clang-tools-extra] [clang] [llvm] [PowerPC] Peephole address calculation in TOC memops (PR #76488)

Qiu Chaofan via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 17 02:05:46 PST 2024


https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/76488

>From 7eb909423d49ea19d9978b097ceb8c4a95fc7bac Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 28 Dec 2023 11:09:07 +0800
Subject: [PATCH 1/5] [PowerPC] Peephole address calculation in TOC memops

---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp   | 391 +++++++++---------
 llvm/test/CodeGen/PowerPC/toc-data-const.ll   |  45 +-
 .../PowerPC/toc-data-peephole-aligment.ll     |  16 +
 llvm/test/CodeGen/PowerPC/toc-data.ll         |  22 +-
 4 files changed, 253 insertions(+), 221 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b57d185bb638b8..c96bf3204a32ed 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7567,224 +7567,233 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
   DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
 }
 
-void PPCDAGToDAGISel::PeepholePPC64() {
-  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-
-  while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = &*--Position;
-    // Skip dead nodes and any non-machine opcodes.
-    if (N->use_empty() || !N->isMachineOpcode())
-      continue;
-
-    if (isVSXSwap(SDValue(N, 0)))
-      reduceVSXSwap(N, CurDAG);
-
-    unsigned FirstOp;
-    unsigned StorageOpcode = N->getMachineOpcode();
-    bool RequiresMod4Offset = false;
+static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
+                              const PPCSubtarget *Subtarget) {
+  unsigned StorageOpcode = N->getMachineOpcode();
+  bool IsLoad = false;
+  SDValue MemOffset, MemBase;
+  bool IsToc = false;
+
+  // TODO: Enable for AIX 32-bit
+  if (!Subtarget->isPPC64())
+    return;
 
-    switch (StorageOpcode) {
-    default: continue;
+  // Global must be word-aligned for LD, STD, LWA.
+  unsigned ExtraAlign = 0;
+  switch (StorageOpcode) {
+  default:
+    return;
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::DFLOADf64:
+  case PPC::DFLOADf32:
+    ExtraAlign = 4;
+    [[fallthrough]];
+  case PPC::LBZ:
+  case PPC::LBZ8:
+  case PPC::LFD:
+  case PPC::LFS:
+  case PPC::LHA:
+  case PPC::LHA8:
+  case PPC::LHZ:
+  case PPC::LHZ8:
+  case PPC::LWZ:
+  case PPC::LWZ8:
+    IsLoad = true;
+    MemOffset = N->getOperand(0);
+    MemBase = N->getOperand(1);
+    break;
+  case PPC::STD:
+  case PPC::DFSTOREf64:
+  case PPC::DFSTOREf32:
+    ExtraAlign = 4;
+    [[fallthrough]];
+  case PPC::STB:
+  case PPC::STB8:
+  case PPC::STFD:
+  case PPC::STFS:
+  case PPC::STH:
+  case PPC::STH8:
+  case PPC::STW:
+  case PPC::STW8:
+    MemOffset = N->getOperand(1);
+    MemBase = N->getOperand(2);
+    break;
+  }
 
-    case PPC::LWA:
-    case PPC::LD:
-    case PPC::DFLOADf64:
-    case PPC::DFLOADf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::LBZ:
-    case PPC::LBZ8:
-    case PPC::LFD:
-    case PPC::LFS:
-    case PPC::LHA:
-    case PPC::LHA8:
-    case PPC::LHZ:
-    case PPC::LHZ8:
-    case PPC::LWZ:
-    case PPC::LWZ8:
-      FirstOp = 0;
-      break;
+  // Only constant offsets can be folded.
+  if (!isa<ConstantSDNode>(MemOffset) || !MemBase.isMachineOpcode())
+    return;
 
-    case PPC::STD:
-    case PPC::DFSTOREf64:
-    case PPC::DFSTOREf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::STB:
-    case PPC::STB8:
-    case PPC::STFD:
-    case PPC::STFS:
-    case PPC::STH:
-    case PPC::STH8:
-    case PPC::STW:
-    case PPC::STW8:
-      FirstOp = 1;
-      break;
+  auto CheckAlign = [DAG](const SDValue &Val, unsigned TargetAlign) {
+    if (TargetAlign == 0)
+      return true;
+    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val)) {
+      const GlobalValue *GV = GA->getGlobal();
+      Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
+      if (Alignment < TargetAlign)
+        return false;
     }
+    return true;
+  };
 
-    // If this is a load or store with a zero offset, or within the alignment,
-    // we may be able to fold an add-immediate into the memory operation.
-    // The check against alignment is below, as it can't occur until we check
-    // the arguments to N
-    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
-      continue;
-
-    SDValue Base = N->getOperand(FirstOp + 1);
-    if (!Base.isMachineOpcode())
-      continue;
+  // We may need to carry relocation information in ADDI to load.
+  std::optional<PPCII::TOF> NewOpFlags;
+  switch (MemBase.getMachineOpcode()) {
+  default:
+    return;
+  case PPC::ADDI8:
+  case PPC::ADDI:
+    // In some cases (such as TLS) the relocation information
+    // is already in place on the operand, so copying the operand
+    // is sufficient.
+    break;
+  case PPC::ADDIdtprelL:
+    NewOpFlags = PPCII::MO_DTPREL_LO;
+    break;
+  case PPC::ADDItlsldL:
+    NewOpFlags = PPCII::MO_TLSLD_LO;
+    break;
+  case PPC::ADDItocL:
+    NewOpFlags = PPCII::MO_TOC_LO;
+    break;
+  case PPC::ADDItoc:
+  case PPC::ADDItoc8:
+    IsToc = true;
+    if (!CheckAlign(MemBase.getOperand(0), ExtraAlign))
+      return;
+    break;
+  }
 
-    unsigned Flags = 0;
-    bool ReplaceFlags = true;
+  SDValue ImmOpnd = MemBase.getOperand(1);
 
-    // When the feeding operation is an add-immediate of some sort,
-    // determine whether we need to add relocation information to the
-    // target flags on the immediate operand when we fold it into the
-    // load instruction.
-    //
-    // For something like ADDItocL, the relocation information is
-    // inferred from the opcode; when we process it in the AsmPrinter,
-    // we add the necessary relocation there.  A load, though, can receive
-    // relocation from various flavors of ADDIxxx, so we need to carry
-    // the relocation information in the target flags.
-    switch (Base.getMachineOpcode()) {
-    default: continue;
-
-    case PPC::ADDI8:
-    case PPC::ADDI:
-      // In some cases (such as TLS) the relocation information
-      // is already in place on the operand, so copying the operand
-      // is sufficient.
-      ReplaceFlags = false;
-      break;
-    case PPC::ADDIdtprelL:
-      Flags = PPCII::MO_DTPREL_LO;
-      break;
-    case PPC::ADDItlsldL:
-      Flags = PPCII::MO_TLSLD_LO;
-      break;
-    case PPC::ADDItocL:
-      Flags = PPCII::MO_TOC_LO;
-      break;
-    }
+  // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
+  // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
+  // we might have needed different @ha relocation values for the offset
+  // pointers).
+  int MaxDisplacement = 7;
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+    const GlobalValue *GV = GA->getGlobal();
+    Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
+    MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
+  }
 
-    SDValue ImmOpnd = Base.getOperand(1);
+  bool UpdateHBase = false;
+  SDValue HBase = MemBase.getOperand(0);
 
-    // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
-    // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
-    // we might have needed different @ha relocation values for the offset
-    // pointers).
-    int MaxDisplacement = 7;
-    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
-      const GlobalValue *GV = GA->getGlobal();
-      Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
-      MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
-    }
+  int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
+  if (NewOpFlags) {
+    if (Offset < 0 || Offset > MaxDisplacement) {
+      // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
+      // one use, then we can do this for any offset, we just need to also
+      // update the offset (i.e. the symbol addend) on the addis also.
+      if (MemBase.getMachineOpcode() != PPC::ADDItocL) {
+        return;
+      }
 
-    bool UpdateHBase = false;
-    SDValue HBase = Base.getOperand(0);
+      if (!HBase.isMachineOpcode() ||
+          HBase.getMachineOpcode() != PPC::ADDIStocHA8) {
+        return;
+      }
 
-    int Offset = N->getConstantOperandVal(FirstOp);
-    if (ReplaceFlags) {
-      if (Offset < 0 || Offset > MaxDisplacement) {
-        // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
-        // one use, then we can do this for any offset, we just need to also
-        // update the offset (i.e. the symbol addend) on the addis also.
-        if (Base.getMachineOpcode() != PPC::ADDItocL)
-          continue;
+      if (!MemBase.hasOneUse() || !HBase.hasOneUse()) {
+        return;
+      }
 
-        if (!HBase.isMachineOpcode() ||
-            HBase.getMachineOpcode() != PPC::ADDIStocHA8)
-          continue;
+      SDValue HImmOpnd = HBase.getOperand(1);
+      if (HImmOpnd != ImmOpnd) {
+        return;
+      }
 
-        if (!Base.hasOneUse() || !HBase.hasOneUse())
-          continue;
+      UpdateHBase = true;
+    }
+  } else {
+    // Global addresses can be folded, only if they are sufficiently aligned.
+    if (!CheckAlign(ImmOpnd, ExtraAlign))
+      return;
 
-        SDValue HImmOpnd = HBase.getOperand(1);
-        if (HImmOpnd != ImmOpnd)
-          continue;
+    if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
+      Offset += C->getSExtValue();
 
-        UpdateHBase = true;
-      }
-    } else {
-      // Global addresses can be folded, but only if they are sufficiently
-      // aligned.
-      if (RequiresMod4Offset) {
-        if (GlobalAddressSDNode *GA =
-                dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
-          const GlobalValue *GV = GA->getGlobal();
-          Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
-          if (Alignment < 4)
-            continue;
-        }
+      if (ExtraAlign && (Offset % ExtraAlign) != 0) {
+        return;
       }
 
-      // If we're directly folding the addend from an addi instruction, then:
-      //  1. In general, the offset on the memory access must be zero.
-      //  2. If the addend is a constant, then it can be combined with a
-      //     non-zero offset, but only if the result meets the encoding
-      //     requirements.
-      if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
-        Offset += C->getSExtValue();
+      if (!isInt<16>(Offset)) {
+        return;
+      }
 
-        if (RequiresMod4Offset && (Offset % 4) != 0)
-          continue;
+      ImmOpnd = DAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
+                                       ImmOpnd.getValueType());
+    } else if (Offset != 0) {
+      return;
+    }
+  }
 
-        if (!isInt<16>(Offset))
-          continue;
+  LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
+  LLVM_DEBUG(MemBase->dump(DAG));
+  LLVM_DEBUG(dbgs() << "\nN: ");
+  LLVM_DEBUG(N->dump(DAG));
+  LLVM_DEBUG(dbgs() << "\n");
 
-        ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
-                                            ImmOpnd.getValueType());
-      } else if (Offset != 0) {
-        continue;
+  // If the relocation information isn't already present on the
+  // immediate operand, add it now.
+  if (NewOpFlags) {
+    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+      const GlobalValue *GV = GA->getGlobal();
+      Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
+      // We can't perform this optimization for data whose alignment
+      // is insufficient for the instruction encoding.
+      if (Alignment < 4 && (ExtraAlign || (Offset % 4) != 0)) {
+        LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
+        return;
       }
+      ImmOpnd = DAG->getTargetGlobalAddress(GV, SDLoc(GA), MVT::i64, Offset,
+                                            NewOpFlags.value());
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+      const Constant *C = CP->getConstVal();
+      ImmOpnd = DAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), Offset,
+                                           NewOpFlags.value());
     }
+  }
 
-    // We found an opportunity.  Reverse the operands from the add
-    // immediate and substitute them into the load or store.  If
-    // needed, update the target flags for the immediate operand to
-    // reflect the necessary relocation information.
-    LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
-    LLVM_DEBUG(Base->dump(CurDAG));
-    LLVM_DEBUG(dbgs() << "\nN: ");
-    LLVM_DEBUG(N->dump(CurDAG));
-    LLVM_DEBUG(dbgs() << "\n");
+  if (IsLoad) {
+    if (IsToc)
+      (void)DAG->UpdateNodeOperands(N, MemBase.getOperand(0),
+                                    MemBase.getOperand(1), N->getOperand(2));
+    else
+      (void)DAG->UpdateNodeOperands(N, ImmOpnd, MemBase.getOperand(0),
+                                    N->getOperand(2));
+  } else {
+    if (IsToc)
+      (void)DAG->UpdateNodeOperands(N, N->getOperand(0), MemBase.getOperand(0),
+                                    MemBase.getOperand(1), N->getOperand(3));
+    else
+      (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+                                    MemBase.getOperand(0), N->getOperand(3));
+  }
 
-    // If the relocation information isn't already present on the
-    // immediate operand, add it now.
-    if (ReplaceFlags) {
-      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
-        SDLoc dl(GA);
-        const GlobalValue *GV = GA->getGlobal();
-        Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
-        // We can't perform this optimization for data whose alignment
-        // is insufficient for the instruction encoding.
-        if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
-          LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
-          continue;
-        }
-        ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
-      } else if (ConstantPoolSDNode *CP =
-                 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
-        const Constant *C = CP->getConstVal();
-        ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
-                                                Offset, Flags);
-      }
-    }
+  if (UpdateHBase)
+    (void)DAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
+                                  ImmOpnd);
 
-    if (FirstOp == 1) // Store
-      (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
-                                       Base.getOperand(0), N->getOperand(3));
-    else // Load
-      (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
-                                       N->getOperand(2));
+  if (MemBase.getNode()->use_empty())
+    DAG->RemoveDeadNode(MemBase.getNode());
+}
 
-    if (UpdateHBase)
-      (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
-                                       ImmOpnd);
+void PPCDAGToDAGISel::PeepholePPC64() {
+  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+  while (Position != CurDAG->allnodes_begin()) {
+    SDNode *N = &*--Position;
+    // Skip dead nodes and any non-machine opcodes.
+    if (N->use_empty() || !N->isMachineOpcode())
+      continue;
+
+    if (isVSXSwap(SDValue(N, 0)))
+      reduceVSXSwap(N, CurDAG);
 
-    // The add-immediate may now be dead, in which case remove it.
-    if (Base.getNode()->use_empty())
-      CurDAG->RemoveDeadNode(Base.getNode());
+    peepholeMemOffset(N, CurDAG, Subtarget);
   }
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-const.ll b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
index 740032e26a432e..6078271738b7cf 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data-const.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
@@ -1,5 +1,8 @@
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,NOOPT
+
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,OPT
 
 ; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
 ; RUN: llvm-readobj %t32.o --syms --relocs | FileCheck %s --check-prefix=OBJ32
@@ -23,15 +26,19 @@ define ptr @retptr() {
 
 attributes #0 = { "toc-data" }
 
-; CHECK:       .read:
-; CHECK:        la 3, i1[TD](2)
+; CHECK-LABEL: .read:
+; NOOPT:         la 3, i1[TD](2)
+; NOOPT:         lwz 3, 0(3)
+; OPT:           lwz 3, i1[TD](2)
 
-; CHECK:       .retptr:
-; CHECK:        la 3, i2[TD](2)
+; CHECK-LABEL: .retptr:
+; CHECK:         la 3, i2[TD](2)
+; CHECK-NEXT:    blr
 
 ; CHECK-DAG:   .toc
 ; CHECK:         .extern i1[TD]
-; CHECK:         .csect i2[TD]
+; CHECK32:       .csect i2[TD],2
+; CHECK64:       .csect i2[TD],3
 
 ; OBJ32:      Relocations [
 ; OBJ32-NEXT:   Section (index: 1) .text {
@@ -111,14 +118,14 @@ attributes #0 = { "toc-data" }
 ; OBJ64:      Relocations [
 ; OBJ64-NEXT:   Section (index: 1) .text {
 ; OBJ64-NEXT:     0x2 R_TOC i1(1) 0xF
-; OBJ64-NEXT:     0x26 R_TOC i2(15) 0xF
+; OBJ64-NEXT:     0x22 R_TOC i2(15) 0xF
 ; OBJ64-NEXT:   }
 ; OBJ64-NEXT:   Section (index: 2) .data {
-; OBJ64-NEXT:     0x48 R_POS .read(5) 0x3F
-; OBJ64-NEXT:     0x50 R_POS TOC(13) 0x3F
-; OBJ64-NEXT:     0x60 R_POS .retptr(7) 0x3F
-; OBJ64-NEXT:     0x68 R_POS TOC(13) 0x3F
-; OBJ64-NEXT:     0x78 R_POS i1(1) 0x3F
+; OBJ64-NEXT:     0x40 R_POS .read(5) 0x3F
+; OBJ64-NEXT:     0x48 R_POS TOC(13) 0x3F
+; OBJ64-NEXT:     0x58 R_POS .retptr(7) 0x3F
+; OBJ64-NEXT:     0x60 R_POS TOC(13) 0x3F
+; OBJ64-NEXT:     0x70 R_POS i1(1) 0x3F
 ; OBJ64-NEXT:   }
 ; OBJ64-NEXT: ]
 
@@ -144,7 +151,7 @@ attributes #0 = { "toc-data" }
 ; OBJ64:      Symbol {
 ; OBJ64:        Index: 13
 ; OBJ64-NEXT:   Name: TOC
-; OBJ64-NEXT:   Value (RelocatableAddress): 0x78
+; OBJ64-NEXT:   Value (RelocatableAddress): 0x70
 ; OBJ64-NEXT:   Section: .data
 ; OBJ64-NEXT:   Type: 0x0
 ; OBJ64-NEXT:   StorageClass: C_HIDEXT (0x6B)
@@ -163,7 +170,7 @@ attributes #0 = { "toc-data" }
 ; OBJ64:      Symbol {
 ; OBJ64:        Index: 15
 ; OBJ64-NEXT:   Name: i2
-; OBJ64-NEXT:   Value (RelocatableAddress): 0x78
+; OBJ64-NEXT:   Value (RelocatableAddress): 0x70
 ; OBJ64-NEXT:   Section: .data
 ; OBJ64-NEXT:   Type: 0x0
 ; OBJ64-NEXT:   StorageClass: C_EXT (0x2)
@@ -188,8 +195,8 @@ attributes #0 = { "toc-data" }
 ; DIS32-NEXT:                         00000026:  R_TOC	i2
 
 ; DIS64:      0000000000000000 <.read>:
-; DIS64-NEXT:        0: 38 62 00 00  	addi 3, 2, 0
+; DIS64-NEXT:        0: 80 62 00 00  	lwz 3, 0(2)
 ; DIS64-NEXT:                         0000000000000002:  R_TOC	i1
-; DIS64:      0000000000000024 <.retptr>:
-; DIS64-NEXT:       24: 38 62 00 00  	addi 3, 2, 0
-; DIS64-NEXT:                         0000000000000026:  R_TOC	i2
+; DIS64:      0000000000000020 <.retptr>:
+; DIS64-NEXT:       20: 38 62 00 00  	addi 3, 2, 0
+; DIS64-NEXT:                         0000000000000022:  R_TOC	i2
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll
new file mode 100644
index 00000000000000..8ec5d9fd331750
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s
+
+ at underaligned = dso_local global i32 123, align 1 #0
+
+define i64 @read() {
+entry:
+  %0  = load i32, ptr @underaligned, align 1
+  %1 = sext i32 %0 to i64
+  ret i64 %1
+}
+
+attributes #0 = { "toc-data"  }
+
+; CHECK-LABEL: .read
+; CHECK:       la [[DEF:[0-9]+]], underaligned[TD](2)
+; CHCEK:       lwa {{[0-9]+}}, 0([[DEF]])
diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll
index cbf3be9fcaad05..a8a7b5d4d386f3 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data.ll
@@ -3,14 +3,14 @@
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
 ; RUN:     -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK64
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST64
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TEST64,ASMOPT64
 
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s \
 ; RUN:     -stop-before=ppc-vsx-copy -O0  | FileCheck %s --check-prefix CHECK32
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
 ; RUN:     -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK64-NOOPT
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefixes=TEST64,ASMNOOPT64
 
 @i = dso_local global i32 0, align 4 #0
 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
@@ -32,8 +32,7 @@ define dso_local void @write_int(i32 signext %in) {
 ; TEST32-NEXT:      stw 3, 0(4)
 
 ; CHECK64: name:            write_int
-; CHECK64:      %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
-; CHECK64-NEXT: STW8 %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
+; CHECK64:      STW8 %{{[0-9]+}}, @i, $x2  :: (store (s32) into @i)
 
 ; CHECK64-NOOPT:  name: write_int
 ; CHECK64-NOOPT:    %[[SUBREG:[0-9]+]]:gprc = COPY %{{[0-9]}}.sub_32
@@ -41,8 +40,9 @@ define dso_local void @write_int(i32 signext %in) {
 ; CHECK64-NOOPT:    STW %[[SUBREG]], 0, killed %[[ADDR]] :: (store (s32) into @i)
 
 ; TEST64:         .write_int:
-; TEST64:           la 4, i[TD](2)
-; TEST64-NEXT:      stw 3, 0(4)
+; ASMNOOPT64:       la 4, i[TD](2)
+; ASMNOOPT64-NEXT:  stw 3, 0(4)
+; ASMOPT64:         stw 3, i[TD](2)
 
 
 define dso_local i64 @read_ll() {
@@ -85,16 +85,16 @@ define dso_local float @read_float() {
 ; TEST32-NEXT:    lfs 1, 0(3)
 
 ; CHECK64: name:            read_float
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
-; CHECK64: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
+; CHECK64: %{{[0-9]+}}:f4rc = LFS @f, $x2 :: (dereferenceable load (s32) from @f)
 
 ; CHECK64-NOOPT: name:            read_float
 ; CHECK64-NOOPT:   %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
 ; CHECK64-NOOPT:   %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]]
 
-; TEST64:       .read_float:
-; TEST64:         la 3, f[TD](2)
-; TEST64-NEXT:    lfs 1, 0(3)
+; TEST64:          .read_float:
+; ASMNOOPT64:        la 3, f[TD](2)
+; ASMNOOPT64-NEXT:   lfs 1, 0(3)
+; ASMOPT64:          lfs 1, f[TD](2)
 
 
 define dso_local void @write_double(double %in) {

>From e7001e9027b5b09d0856c67942f3437374e031c5 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 11 Jan 2024 13:53:01 +0800
Subject: [PATCH 2/5] Reverse operands

---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 57 ++++++---------------
 1 file changed, 15 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 95b391f9c0d3bc..353367514b8789 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7682,26 +7682,12 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
   int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
   if (NewOpFlags) {
     if (Offset < 0 || Offset > MaxDisplacement) {
-      // If we have a addi(toc at l)/addis(toc at ha) pair, and the addis has only
-      // one use, then we can do this for any offset, we just need to also
-      // update the offset (i.e. the symbol addend) on the addis also.
-      if (MemBase.getMachineOpcode() != PPC::ADDItocL) {
+      // Check base opcode and its uses, quit if it has multiple uses.
+      if (MemBase.getMachineOpcode() != PPC::ADDItocL ||
+          !HBase.isMachineOpcode() ||
+          HBase.getMachineOpcode() != PPC::ADDIStocHA8 || !MemBase.hasOneUse() ||
+          !HBase.hasOneUse() || HBase.getOperand(1) != ImmOpnd)
         return;
-      }
-
-      if (!HBase.isMachineOpcode() ||
-          HBase.getMachineOpcode() != PPC::ADDIStocHA8) {
-        return;
-      }
-
-      if (!MemBase.hasOneUse() || !HBase.hasOneUse()) {
-        return;
-      }
-
-      SDValue HImmOpnd = HBase.getOperand(1);
-      if (HImmOpnd != ImmOpnd) {
-        return;
-      }
 
       UpdateHBase = true;
     }
@@ -7713,13 +7699,8 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
       Offset += C->getSExtValue();
 
-      if (ExtraAlign && (Offset % ExtraAlign) != 0) {
+      if ((ExtraAlign && (Offset % ExtraAlign) != 0) || !isInt<16>(Offset))
         return;
-      }
-
-      if (!isInt<16>(Offset)) {
-        return;
-      }
 
       ImmOpnd = DAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
                                        ImmOpnd.getValueType());
@@ -7734,8 +7715,7 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
   LLVM_DEBUG(N->dump(DAG));
   LLVM_DEBUG(dbgs() << "\n");
 
-  // If the relocation information isn't already present on the
-  // immediate operand, add it now.
+  // Add relocation flag if not present on the immediate operand.
   if (NewOpFlags) {
     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
       const GlobalValue *GV = GA->getGlobal();
@@ -7755,21 +7735,14 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     }
   }
 
-  if (IsLoad) {
-    if (IsToc)
-      (void)DAG->UpdateNodeOperands(N, MemBase.getOperand(0),
-                                    MemBase.getOperand(1), N->getOperand(2));
-    else
-      (void)DAG->UpdateNodeOperands(N, ImmOpnd, MemBase.getOperand(0),
-                                    N->getOperand(2));
-  } else {
-    if (IsToc)
-      (void)DAG->UpdateNodeOperands(N, N->getOperand(0), MemBase.getOperand(0),
-                                    MemBase.getOperand(1), N->getOperand(3));
-    else
-      (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
-                                    MemBase.getOperand(0), N->getOperand(3));
-  }
+  SDValue RegOpnd = MemBase.getOperand(0);
+  if (IsToc)
+    std::swap(RegOpnd, ImmOpnd);
+  if (IsLoad)
+    (void)DAG->UpdateNodeOperands(N, ImmOpnd, RegOpnd, N->getOperand(2));
+  else
+    (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, RegOpnd,
+                                  N->getOperand(3));
 
   if (UpdateHBase)
     (void)DAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),

>From 6436d4c0298d7cc0b8ad9254ad1dd55ecef51ff2 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 16 Jan 2024 18:09:45 +0800
Subject: [PATCH 3/5] Comment reversed operands of ADDItoc

---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 30 ++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 353367514b8789..ab12864de6d546 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7570,7 +7570,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
   unsigned StorageOpcode = N->getMachineOpcode();
   bool IsLoad = false;
   SDValue MemOffset, MemBase;
-  bool IsToc = false;
 
   // TODO: Enable for AIX 32-bit
   if (!Subtarget->isPPC64())
@@ -7619,10 +7618,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     break;
   }
 
-  // Only constant offsets can be folded.
-  if (!isa<ConstantSDNode>(MemOffset) || !MemBase.isMachineOpcode())
-    return;
-
   auto CheckAlign = [DAG](const SDValue &Val, unsigned TargetAlign) {
     if (TargetAlign == 0)
       return true;
@@ -7635,8 +7630,17 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     return true;
   };
 
-  // We may need to carry relocation information in ADDI to load.
+  // Only additions with constant offsets will be folded.
+  if (!isa<ConstantSDNode>(MemOffset) || !MemBase.isMachineOpcode())
+    return;
+
+  // Some flags in addition needs to be carried to new memop.
   std::optional<PPCII::TOF> NewOpFlags;
+  SDValue ImmOpnd, RegOpnd;
+  if (MemBase.getNumOperands() == 2) {
+    ImmOpnd = MemBase.getOperand(1);
+    RegOpnd = MemBase.getOperand(0);
+  }
   switch (MemBase.getMachineOpcode()) {
   default:
     return;
@@ -7657,14 +7661,14 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     break;
   case PPC::ADDItoc:
   case PPC::ADDItoc8:
-    IsToc = true;
+    // ADDItoc and ADDItoc8 (prints 'la') put register at second operand.
+    std::swap(ImmOpnd, RegOpnd);
     if (!CheckAlign(MemBase.getOperand(0), ExtraAlign))
       return;
     break;
   }
 
-  SDValue ImmOpnd = MemBase.getOperand(1);
-
+  // TODO: Why?
   // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
   // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
   // we might have needed different @ha relocation values for the offset
@@ -7682,6 +7686,7 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
   int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
   if (NewOpFlags) {
     if (Offset < 0 || Offset > MaxDisplacement) {
+      // TODO: Why?
       // Check base opcode and its uses, quit if it has multiple uses.
       if (MemBase.getMachineOpcode() != PPC::ADDItocL ||
           !HBase.isMachineOpcode() ||
@@ -7695,13 +7700,11 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     // Global addresses can be folded, only if they are sufficiently aligned.
     if (!CheckAlign(ImmOpnd, ExtraAlign))
       return;
-
     if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
       Offset += C->getSExtValue();
 
       if ((ExtraAlign && (Offset % ExtraAlign) != 0) || !isInt<16>(Offset))
         return;
-
       ImmOpnd = DAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
                                        ImmOpnd.getValueType());
     } else if (Offset != 0) {
@@ -7715,8 +7718,8 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
   LLVM_DEBUG(N->dump(DAG));
   LLVM_DEBUG(dbgs() << "\n");
 
-  // Add relocation flag if not present on the immediate operand.
   if (NewOpFlags) {
+    // Add relocation flag if not present on the immediate operand.
     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
       const GlobalValue *GV = GA->getGlobal();
       Align Alignment = GV->getPointerAlignment(DAG->getDataLayout());
@@ -7735,9 +7738,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     }
   }
 
-  SDValue RegOpnd = MemBase.getOperand(0);
-  if (IsToc)
-    std::swap(RegOpnd, ImmOpnd);
   if (IsLoad)
     (void)DAG->UpdateNodeOperands(N, ImmOpnd, RegOpnd, N->getOperand(2));
   else

>From 4c98b55ce46e1d81b0a8b6959db05a83df92f893 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 16 Jan 2024 18:15:38 +0800
Subject: [PATCH 4/5] Fix format

---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index ab12864de6d546..c53adb5e2ced0b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7668,7 +7668,6 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     break;
   }
 
-  // TODO: Why?
   // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
   // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
   // we might have needed different @ha relocation values for the offset
@@ -7686,12 +7685,11 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
   int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
   if (NewOpFlags) {
     if (Offset < 0 || Offset > MaxDisplacement) {
-      // TODO: Why?
       // Check base opcode and its uses, quit if it has multiple uses.
-      if (MemBase.getMachineOpcode() != PPC::ADDItocL ||
-          !HBase.isMachineOpcode() ||
-          HBase.getMachineOpcode() != PPC::ADDIStocHA8 || !MemBase.hasOneUse() ||
-          !HBase.hasOneUse() || HBase.getOperand(1) != ImmOpnd)
+      if (MemBase.getMachineOpcode() != PPC::ADDItocL || !MemBase.hasOneUse() ||
+          !HBase.isMachineOpcode() || !HBase.hasOneUse() ||
+          HBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
+          HBase.getOperand(1) != ImmOpnd)
         return;
 
       UpdateHBase = true;

>From 2f8de37d10067a05f78a7dcea976d2f484d3f02f Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Wed, 17 Jan 2024 18:05:25 +0800
Subject: [PATCH 5/5] Rename HBase

---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index c53adb5e2ced0b..dd5aada811fb73 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7679,20 +7679,21 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
   }
 
-  bool UpdateHBase = false;
-  SDValue HBase = MemBase.getOperand(0);
+  // If addis also contributes to TOC relocation, it also needs to be updated.
+  bool UpdateHaBase = false;
+  SDValue HaBase = MemBase.getOperand(0);
 
   int Offset = cast<ConstantSDNode>(MemOffset)->getZExtValue();
   if (NewOpFlags) {
     if (Offset < 0 || Offset > MaxDisplacement) {
       // Check base opcode and its uses, quit if it has multiple uses.
       if (MemBase.getMachineOpcode() != PPC::ADDItocL || !MemBase.hasOneUse() ||
-          !HBase.isMachineOpcode() || !HBase.hasOneUse() ||
-          HBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
-          HBase.getOperand(1) != ImmOpnd)
+          !HaBase.isMachineOpcode() || !HaBase.hasOneUse() ||
+          HaBase.getMachineOpcode() != PPC::ADDIStocHA8 ||
+          HaBase.getOperand(1) != ImmOpnd)
         return;
 
-      UpdateHBase = true;
+      UpdateHaBase = true;
     }
   } else {
     // Global addresses can be folded, only if they are sufficiently aligned.
@@ -7742,8 +7743,8 @@ static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
     (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, RegOpnd,
                                   N->getOperand(3));
 
-  if (UpdateHBase)
-    (void)DAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
+  if (UpdateHaBase)
+    (void)DAG->UpdateNodeOperands(HaBase.getNode(), HaBase.getOperand(0),
                                   ImmOpnd);
 
   if (MemBase.getNode()->use_empty())



More information about the cfe-commits mailing list