[llvm-branch-commits] [llvm] [SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms (PR #146074)
Fabian Ritter via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 27 06:28:20 PDT 2025
https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/146074
This PR adds a TargetLowering hook, canTransformPtrArithOutOfBounds,
that targets can use to allow transformations to introduce out-of-bounds
pointer arithmetic. It also moves two such transformations from the
AMDGPU-specific DAG combines to the generic DAGCombiner.
This is motivated by target features like AArch64's checked pointer
arithmetic, CPA, which does not tolerate the introduction of
out-of-bounds pointer arithmetic.
>From 6c5131da9bbb0537189b28fc4383f79607f9841a Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Thu, 26 Jun 2025 06:10:35 -0400
Subject: [PATCH] [SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD
transforms
This PR adds a TargetLowering hook, canTransformPtrArithOutOfBounds,
that targets can use to allow transformations to introduce out-of-bounds
pointer arithmetic. It also moves two such transformations from the
AMDGPU-specific DAG combines to the generic DAGCombiner.
This is motivated by target features like AArch64's checked pointer
arithmetic, CPA, which does not tolerate the introduction of
out-of-bounds pointer arithmetic.
---
llvm/include/llvm/CodeGen/TargetLowering.h | 7 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 125 +++++++++++-------
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 60 ++-------
llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 +
4 files changed, 94 insertions(+), 101 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 69ae4f80297d5..ba4f23b2d9191 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3510,6 +3510,13 @@ class LLVM_ABI TargetLoweringBase {
return false;
}
+ /// True if the target allows transformations of in-bounds pointer
+ /// arithmetic that cause out-of-bounds intermediate results.
+ virtual bool canTransformPtrArithOutOfBounds(const Function &F,
+ EVT PtrVT) const {
+ return false;
+ }
+
/// Does this target support complex deinterleaving
virtual bool isComplexDeinterleavingSupported() const { return false; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 08dab7c697b99..3626ac45a4860 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2689,59 +2689,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
if (PtrVT == IntVT && isNullConstant(N0))
return N1;
- if (N0.getOpcode() != ISD::PTRADD ||
- reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
- return SDValue();
-
- SDValue X = N0.getOperand(0);
- SDValue Y = N0.getOperand(1);
- SDValue Z = N1;
- bool N0OneUse = N0.hasOneUse();
- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-
- // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
- // * y is a constant and (ptradd x, y) has one use; or
- // * y and z are both constants.
- if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
- // If both additions in the original were NUW, the new ones are as well.
- SDNodeFlags Flags =
- (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
- SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
- AddToWorklist(Add.getNode());
- return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+ if (N0.getOpcode() == ISD::PTRADD &&
+ !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+ SDValue Z = N1;
+ bool N0OneUse = N0.hasOneUse();
+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+ // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
+ // * y is a constant and (ptradd x, y) has one use; or
+ // * y and z are both constants.
+ if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
+ // If both additions in the original were NUW, the new ones are as well.
+ SDNodeFlags Flags =
+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+ SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
+ AddToWorklist(Add.getNode());
+ return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+ }
+ }
+
+ // The following combines can turn in-bounds pointer arithmetic out of bounds.
+ // That is problematic for settings like AArch64's CPA, which checks that
+ // intermediate results of pointer arithmetic remain in bounds. The target
+ // therefore needs to opt-in to enable them.
+ if (!TLI.canTransformPtrArithOutOfBounds(
+ DAG.getMachineFunction().getFunction(), PtrVT))
+ return SDValue();
+
+ if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
+ // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
+ // global address GA and constant c, such that c can be folded into GA.
+ SDValue GAValue = N0.getOperand(0);
+ if (const GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(GAValue)) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // If both additions in the original were NUW, reassociation preserves
+ // that.
+ SDNodeFlags Flags =
+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+ SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
+ AddToWorklist(Inner.getNode());
+ return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
+ }
+ }
}
- // TODO: There is another possible fold here that was proven useful.
- // It would be this:
- //
- // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
- // * (ptradd x, y) has one use; and
- // * y is a constant; and
- // * z is not a constant.
- //
- // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
- // opportunity to select more complex instructions such as SUBPT and
- // MSUBPT. However, a hypothetical corner case has been found that we could
- // not avoid. Consider this (pseudo-POSIX C):
- //
- // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
- // char *p = mmap(LARGE_CONSTANT);
- // char *q = foo(p, -LARGE_CONSTANT);
- //
- // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
- // further + z takes it back to the start of the mapping, so valid,
- // regardless of the address mmap gave back. However, if mmap gives you an
- // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
- // borrow from the high bits (with the subsequent + z carrying back into
- // the high bits to give you a well-defined pointer) and thus trip
- // FEAT_CPA's pointer corruption checks.
- //
- // We leave this fold as an opportunity for future work, addressing the
- // corner case for FEAT_CPA, as well as reconciling the solution with the
- // more general application of pointer arithmetic in other future targets.
- // For now each architecture that wants this fold must implement it in the
- // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
+ // y is not, and (add y, z) is used only once.
+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
+ // z is not, and (add y, z) is used only once.
+ // The goal is to move constant offsets to the outermost ptradd, to create
+ // more opportunities to fold offsets into memory instructions.
+ // Together with the another combine above, this also implements
+ // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
+ SDValue X = N0;
+ SDValue Y = N1.getOperand(0);
+ SDValue Z = N1.getOperand(1);
+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+ // If both additions in the original were NUW, reassociation preserves that.
+ SDNodeFlags ReassocFlags =
+ (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+
+ if (ZIsConstant != YIsConstant) {
+ if (YIsConstant)
+ std::swap(Y, Z);
+ SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
+ AddToWorklist(Inner.getNode());
+ return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
+ }
+ }
return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 52c811d27e804..822bab88c8a09 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10602,6 +10602,11 @@ bool SITargetLowering::shouldPreservePtrArith(const Function &F,
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
}
+bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F,
+ EVT PtrVT) const {
+ return true;
+}
+
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
// offset (the offset that is included in bounds checking and swizzling, to be
// split between the instruction's voffset and immoffset fields) and soffset
@@ -15131,65 +15136,17 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
return Folded;
}
- if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
- // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
- // global address GA and constant c, such that c can be folded into GA.
- SDValue GAValue = N0.getOperand(0);
- if (const GlobalAddressSDNode *GA =
- dyn_cast<GlobalAddressSDNode>(GAValue)) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (DCI.isBeforeLegalizeOps() && TLI.isOffsetFoldingLegal(GA)) {
- // If both additions in the original were NUW, reassociation preserves
- // that.
- SDNodeFlags Flags =
- (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
- SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
- DCI.AddToWorklist(Inner.getNode());
- return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
- }
- }
- }
-
if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse())
return SDValue();
- // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
- // y is not, and (add y, z) is used only once.
- // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
- // z is not, and (add y, z) is used only once.
- // The goal is to move constant offsets to the outermost ptradd, to create
- // more opportunities to fold offsets into memory instructions.
- // Together with the generic combines in DAGCombiner.cpp, this also
- // implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
- //
- // This transform is here instead of in the general DAGCombiner as it can
- // turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
- // AArch64's CPA.
SDValue X = N0;
SDValue Y = N1.getOperand(0);
SDValue Z = N1.getOperand(1);
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
- // If both additions in the original were NUW, reassociation preserves that.
- SDNodeFlags ReassocFlags =
- (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
-
- if (ZIsConstant != YIsConstant) {
- if (YIsConstant)
- std::swap(Y, Z);
- SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
- DCI.AddToWorklist(Inner.getNode());
- return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
- }
-
- // If one of Y and Z is constant, they have been handled above. If both were
- // constant, the addition would have been folded in SelectionDAG::getNode
- // already. This ensures that the generic DAG combines won't undo the
- // following reassociation.
- assert(!YIsConstant && !ZIsConstant);
-
- if (!X->isDivergent() && Y->isDivergent() != Z->isDivergent()) {
+ if (!YIsConstant && !ZIsConstant && !X->isDivergent() &&
+ Y->isDivergent() != Z->isDivergent()) {
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if x and
// y are uniform and z isn't.
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if x and
@@ -15200,6 +15157,9 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
// reassociate.
if (Y->isDivergent())
std::swap(Y, Z);
+ // If both additions in the original were NUW, reassociation preserves that.
+ SDNodeFlags ReassocFlags =
+ (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
SDValue UniformInner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
DCI.AddToWorklist(UniformInner.getNode());
return DAG.getMemBasePlusOffset(UniformInner, Z, DL, ReassocFlags);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c66f300ec4cb1..c5bc959c48e60 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -264,6 +264,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override;
+ bool canTransformPtrArithOutOfBounds(const Function &F,
+ EVT PtrVT) const override;
+
private:
// Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store
// the three offsets (voffset, soffset and instoffset) into the SDValue[3]
More information about the llvm-branch-commits
mailing list