[llvm] [PowerPC] Peephole address calculation in TOC memops (PR #76488)

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 2 20:53:02 PST 2024


================
@@ -7651,241 +7651,215 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
     DAG->RemoveDeadNode(InitialADDI.getNode());
 }
 
-void PPCDAGToDAGISel::PeepholePPC64() {
-  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-  bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
-
-  while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = &*--Position;
-    // Skip dead nodes and any non-machine opcodes.
-    if (N->use_empty() || !N->isMachineOpcode())
-      continue;
-
-    if (isVSXSwap(SDValue(N, 0)))
-      reduceVSXSwap(N, CurDAG);
-
-    // This optimization is performed for non-TOC-based local-exec accesses.
-    if (HasAIXSmallLocalExecTLS)
-      foldADDIForLocalExecAccesses(N, CurDAG);
+static bool isValidOffsetMemOp(SDNode *N, bool &IsLoad,
+                               MaybeAlign &ExtraAlign) {
+  switch (N->getMachineOpcode()) {
+  default:
+    return false;
+  // Global must be word-aligned for LD, STD, LWA.
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::DFLOADf64:
+  case PPC::DFLOADf32:
+    ExtraAlign = Align(4);
+    [[fallthrough]];
+  case PPC::LBZ:
+  case PPC::LBZ8:
+  case PPC::LFD:
+  case PPC::LFS:
+  case PPC::LHA:
+  case PPC::LHA8:
+  case PPC::LHZ:
+  case PPC::LHZ8:
+  case PPC::LWZ:
+  case PPC::LWZ8:
+    IsLoad = true;
+    break;
+  case PPC::STD:
+  case PPC::DFSTOREf64:
+  case PPC::DFSTOREf32:
+    ExtraAlign = Align(4);
+    [[fallthrough]];
+  case PPC::STB:
+  case PPC::STB8:
+  case PPC::STFD:
+  case PPC::STFS:
+  case PPC::STH:
+  case PPC::STH8:
+  case PPC::STW:
+  case PPC::STW8:
+    break;
+  }
+  SDValue Base = N->getOperand(IsLoad ? 1 : 2);
+  if (!Base.isMachineOpcode())
+    return false;
+  switch (Base.getMachineOpcode()) {
+  default:
+    return false;
+  case PPC::ADDI8:
+  case PPC::ADDI:
+  case PPC::ADDIdtprelL:
+  case PPC::ADDItlsldL:
+  case PPC::ADDItocL:
+  case PPC::ADDItoc:
+  case PPC::ADDItoc8:
+    break;
+  }
+  return true;
+}
 
-    unsigned FirstOp;
-    unsigned StorageOpcode = N->getMachineOpcode();
-    bool RequiresMod4Offset = false;
+static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG,
+                              const PPCSubtarget *Subtarget) {
+  // TODO: Enable for AIX 32-bit.
+  if (!Subtarget->isPPC64())
+    return;
 
-    switch (StorageOpcode) {
-    default: continue;
+  bool IsLoad = false;
+  MaybeAlign ExtraAlign;
+  if (!isValidOffsetMemOp(N, IsLoad, ExtraAlign))
+    return;
 
-    case PPC::LWA:
-    case PPC::LD:
-    case PPC::DFLOADf64:
-    case PPC::DFLOADf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::LBZ:
-    case PPC::LBZ8:
-    case PPC::LFD:
-    case PPC::LFS:
-    case PPC::LHA:
-    case PPC::LHA8:
-    case PPC::LHZ:
-    case PPC::LHZ8:
-    case PPC::LWZ:
-    case PPC::LWZ8:
-      FirstOp = 0;
-      break;
+  SDValue MemOffset = N->getOperand(IsLoad ? 0 : 1);
+  SDValue MemBase = N->getOperand(IsLoad ? 1 : 2);
+  unsigned BaseOpc = MemBase.getMachineOpcode();
+  const DataLayout &DL = DAG->getDataLayout();
 
-    case PPC::STD:
-    case PPC::DFSTOREf64:
-    case PPC::DFSTOREf32:
-      RequiresMod4Offset = true;
-      [[fallthrough]];
-    case PPC::STB:
-    case PPC::STB8:
-    case PPC::STFD:
-    case PPC::STFS:
-    case PPC::STH:
-    case PPC::STH8:
-    case PPC::STW:
-    case PPC::STW8:
-      FirstOp = 1;
-      break;
-    }
+  // Only additions with constant offsets will be folded.
+  if (!isa<ConstantSDNode>(MemOffset))
+    return;
+  assert(MemBase.getNumOperands() == 2 && "Invalid base of memop with offset!");
 
-    // If this is a load or store with a zero offset, or within the alignment,
-    // we may be able to fold an add-immediate into the memory operation.
-    // The check against alignment is below, as it can't occur until we check
-    // the arguments to N
-    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
-      continue;
+  SDValue ImmOp, RegOp;
+  // ADDItoc and ADDItoc8 ('la') puts the register at the second operand.
+  if (BaseOpc == PPC::ADDItoc || BaseOpc == PPC::ADDItoc8) {
+    ImmOp = MemBase.getOperand(0);
+    RegOp = MemBase.getOperand(1);
+  } else {
+    ImmOp = MemBase.getOperand(1);
+    RegOp = MemBase.getOperand(0);
+  }
 
-    SDValue Base = N->getOperand(FirstOp + 1);
-    if (!Base.isMachineOpcode())
-      continue;
+  MaybeAlign ImmAlign;
+  if (isa<GlobalAddressSDNode>(ImmOp))
----------------
amy-kwan wrote:

nit: I think we should not use isa/cast together? It looks like in https://llvm.org/docs/ProgrammersManual.html#the-isa-cast-and-dyn-cast-templates, it says:

> Note that you should not use an isa<> test followed by a cast<>, for that use the dyn_cast<> operator.


https://github.com/llvm/llvm-project/pull/76488


More information about the llvm-commits mailing list