[llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)

Thu Jul 10 06:25:08 PDT 2025

================
@@ -15089,42 +15094,116 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
-  if (N1.getOpcode() == ISD::ADD) {
-    // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-    //    y is not, and (add y, z) is used only once.
-    // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-    //    z is not, and (add y, z) is used only once.
-    // The goal is to move constant offsets to the outermost ptradd, to create
-    // more opportunities to fold offsets into memory instructions.
-    // Together with the generic combines in DAGCombiner.cpp, this also
-    // implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-    //
-    // This transform is here instead of in the general DAGCombiner as it can
-    // turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
-    // AArch64's CPA.
-    SDValue X = N0;
-    SDValue Y = N1.getOperand(0);
-    SDValue Z = N1.getOperand(1);
-    if (N1.hasOneUse()) {
-      bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-      bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-      if (ZIsConstant != YIsConstant) {
-        // If both additions in the original were NUW, the new ones are as well.
-        SDNodeFlags Flags =
-            (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
-        if (YIsConstant)
-          std::swap(Y, Z);
+  // The following folds transform PTRADDs into regular arithmetic in cases
+  // where the PTRADD wouldn't be folded as an immediate offset into memory
+  // instructions anyway. They are target-specific in that other targets might
+  // prefer to not lose information about the pointer arithmetic.
+
+  // Fold (ptradd x, shl(0 - v, k)) -> sub(x, shl(v, k)).
+  // Adapted from DAGCombiner::visitADDLikeCommutative.
+  SDValue V, K;
+  if (sd_match(N1, m_Shl(m_Neg(m_Value(V)), m_Value(K)))) {
+    SDValue Inner = DAG.getNode(ISD::SHL, DL, VT, V, K);
+    DCI.AddToWorklist(Inner.getNode());
+    return DAG.getNode(ISD::SUB, DL, VT, N0, Inner);
+  }
+
+  // Fold into Mad64 if the right-hand side is a MUL. Analogous to a fold in
+  // performAddCombine.
+  if (N1.getOpcode() == ISD::MUL) {
+    if (Subtarget->hasMad64_32()) {
+      if (SDValue Folded = tryFoldToMad64_32(N, DCI))
+        return Folded;
+    }
+  }
 
-        SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, Flags);
+  // If the 32 low bits of the constant are all zero, there is nothing to fold
+  // into an immediate offset, so it's better to eliminate the unnecessary
+  // addition for the lower 32 bits than to preserve the PTRADD.
+  // Analogous to a fold in performAddCombine.
+  if (VT == MVT::i64) {
+    if (SDValue Folded = foldAddSub64WithZeroLowBitsTo32(N, DCI))
+      return Folded;
+  }
+
+  if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
+    // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
+    // global address GA and constant c, such that c can be folded into GA.
+    SDValue GAValue = N0.getOperand(0);
+    if (const GlobalAddressSDNode *GA =
+            dyn_cast<GlobalAddressSDNode>(GAValue)) {
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      if (DCI.isBeforeLegalizeOps() && TLI.isOffsetFoldingLegal(GA)) {
----------------
arsenm wrote:

I've never understood exactly what isOffsetFoldingLegal does; does this result in any test changes? Do we do anything meaningful with a global base address here? 

https://github.com/llvm/llvm-project/pull/143673