[llvm] [PowerPC] Peephole address calculation in TOC memops (PR #76488)

Sean Fertile via llvm-commits llvm-commits at lists.llvm.org
Tue May 14 07:23:49 PDT 2024


================
@@ -7662,238 +7662,212 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
     DAG->RemoveDeadNode(InitialADDI.getNode());
 }
 
-void PPCDAGToDAGISel::PeepholePPC64() {
-  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-
-  while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = &*--Position;
-    // Skip dead nodes and any non-machine opcodes.
-    if (N->use_empty() || !N->isMachineOpcode())
-      continue;
+static bool isValidMemOp(SDNode *N, bool &IsLoad, MaybeAlign &ExtraAlign) {
+  switch (N->getMachineOpcode()) {
+  default:
+    return false;
+  // Global must be word-aligned for LD, STD, LWA.
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::DFLOADf64:
+  case PPC::DFLOADf32:
+    ExtraAlign = Align(4);
+    [[fallthrough]];
+  case PPC::LBZ:
+  case PPC::LBZ8:
+  case PPC::LFD:
+  case PPC::LFS:
+  case PPC::LHA:
+  case PPC::LHA8:
+  case PPC::LHZ:
+  case PPC::LHZ8:
+  case PPC::LWZ:
+  case PPC::LWZ8:
+    IsLoad = true;
+    return true;
+  case PPC::STD:
+  case PPC::DFSTOREf64:
+  case PPC::DFSTOREf32:
+    ExtraAlign = Align(4);
+    [[fallthrough]];
+  case PPC::STB:
+  case PPC::STB8:
+  case PPC::STFD:
+  case PPC::STFS:
+  case PPC::STH:
+  case PPC::STH8:
+  case PPC::STW:
+  case PPC::STW8:
+    return true;
+  }
+}
 
-    if (isVSXSwap(SDValue(N, 0)))
-      reduceVSXSwap(N, CurDAG);
+static bool isMemBaseCombinable(SDValue Base) {
+  if (!Base.isMachineOpcode())
+    return false;
+  switch (Base.getMachineOpcode()) {
+  default:
+    return false;
+  case PPC::ADDI8:
+  case PPC::ADDI:
+  case PPC::ADDIdtprelL:
+  case PPC::ADDItlsldL:
+  case PPC::ADDItocL8:
+  case PPC::ADDItoc:
+  case PPC::ADDItoc8:
+    return true;
+  }
+}
 
-    // This optimization is performed for non-TOC-based local-exec accesses.
-    foldADDIForLocalExecAccesses(N, CurDAG);
+static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
+                              const PPCSubtarget *Subtarget) {
+  // TODO: Enable for AIX 32-bit.
+  if (!Subtarget->isPPC64())
+    return;
 
-    unsigned FirstOp;
-    unsigned StorageOpcode = N->getMachineOpcode();
-    bool RequiresMod4Offset = false;
+  bool IsLoad = false;
+  MaybeAlign ExtraAlign;
+  if (!isValidMemOp(N, IsLoad, ExtraAlign))
+    return;
 
-    switch (StorageOpcode) {
-    default: continue;
+  SDValue MemBase = N->getOperand(IsLoad ? 1 : 2);
+  if (!isMemBaseCombinable(MemBase))
+    return;
 
-    case PPC::LWA:
-    case PPC::LD:
-    case PPC::DFLOADf64:
-    case PPC::DFLOADf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::LBZ:
-    case PPC::LBZ8:
-    case PPC::LFD:
-    case PPC::LFS:
-    case PPC::LHA:
-    case PPC::LHA8:
-    case PPC::LHZ:
-    case PPC::LHZ8:
-    case PPC::LWZ:
-    case PPC::LWZ8:
-      FirstOp = 0;
-      break;
+  // Only additions with constant offsets will be folded.
+  auto *MemOffset = dyn_cast<ConstantSDNode>(N->getOperand(IsLoad ? 0 : 1));
+  if (!MemOffset)
+    return;
 
-    case PPC::STD:
-    case PPC::DFSTOREf64:
-    case PPC::DFSTOREf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::STB:
-    case PPC::STB8:
-    case PPC::STFD:
-    case PPC::STFS:
-    case PPC::STH:
-    case PPC::STH8:
-    case PPC::STW:
-    case PPC::STW8:
-      FirstOp = 1;
-      break;
-    }
+  SDValue ImmOp, RegOp;
+  unsigned BaseOpc = MemBase.getMachineOpcode();
+  assert(MemBase.getNumOperands() == 2 && "Invalid base of memop with offset!");
 
-    // If this is a load or store with a zero offset, or within the alignment,
-    // we may be able to fold an add-immediate into the memory operation.
-    // The check against alignment is below, as it can't occur until we check
-    // the arguments to N
-    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
-      continue;
+  // ADDItoc and ADDItoc8 ('la') puts the register at the second operand.
+  if (BaseOpc == PPC::ADDItoc || BaseOpc == PPC::ADDItoc8) {
+    ImmOp = MemBase.getOperand(0);
+    RegOp = MemBase.getOperand(1);
+  } else {
+    ImmOp = MemBase.getOperand(1);
+    RegOp = MemBase.getOperand(0);
+  }
 
-    SDValue Base = N->getOperand(FirstOp + 1);
-    if (!Base.isMachineOpcode())
-      continue;
+  MaybeAlign ImmAlign;
+  if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOp))
+    ImmAlign = GA->getGlobal()->getPointerAlignment(DAG->getDataLayout());
+  else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOp))
+    ImmAlign = CP->getAlign();
 
-    unsigned Flags = 0;
-    bool ReplaceFlags = true;
+  if (ImmAlign && ExtraAlign && ImmAlign.value() < ExtraAlign.value())
----------------
mandlebug wrote:

On lines 7756-7759 we are extracting alignment info from the node used to produce the immediate, and here we check that said alignment meets the encoding limitation for the instruction. If we haven't set ImmAlign should we return early since we cannot guarantee the operand has a suitable alignment for the instruction?

https://github.com/llvm/llvm-project/pull/76488


More information about the llvm-commits mailing list