[llvm] [IndVarSimplify] Add rewriting ptr-add phis with offset addressing (PR #171151)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 08:08:53 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nashe Mncube (nasherm)
<details>
<summary>Changes</summary>
This patch adds support to IndVarSimplify to rewrite IVs used in loops for strided loads or stores. These IVs are rewritten to make use of offset addressing thereby elimnating PHI nodes and simplifying the IR. The motivation for this is that with these PHI nodes eliminated vectorization becomes more feasible.
---
Full diff: https://github.com/llvm/llvm-project/pull/171151.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (+111)
- (added) llvm/test/Transforms/IndVarSimplify/rewrite-ptr-addr-with-offset-addr.ll (+64)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 400cb1ecb5e03..0b6b8205ba0d0 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -164,6 +164,9 @@ class IndVarSimplify {
bool sinkUnusedInvariants(Loop *L);
+ bool rewritePtrIncrementWithOffsettAddressing(
+ Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts);
+
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -2039,6 +2042,111 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}
+bool IndVarSimplify::rewritePtrIncrementWithOffsettAddressing(
+ Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ SmallVector<PHINode *, 8> LoopPhis(
+ llvm::make_pointer_range(L->getHeader()->phis()));
+ bool Changed = false;
+
+ auto IsPtrToOffsetAddressingCandidate = [&](PHINode *PHI) -> bool {
+ if (PHI->user_empty())
+ return false;
+
+ if (PHI->hasConstantValue())
+ return false;
+
+ // TODO: is this necessary.
+ if (L->getCanonicalInductionVariable() == PHI)
+ return false;
+
+ // We are only concerned with simple PHI nodes
+ // with two incoming values.
+ if (PHI->getNumIncomingValues() != 2)
+ return false;
+
+ return true;
+ };
+
+ auto NonInvariantPHIOp = [&](Value *Op0, Value *Op1) -> Value * {
+ // One of the two incoming values must be loop invariant
+ // but not both
+ if (!(L->isLoopInvariant(Op0) != L->isLoopInvariant(Op1)))
+ return nullptr;
+
+ return (L->isLoopInvariant(Op0)) ? Op1 : Op0;
+ };
+
+ auto IsConstantIncrement = [&](GetElementPtrInst *GEP) -> bool {
+ if (GEP->getNumIndices() != 1)
+ return false;
+
+ if (GEP->hasAllConstantIndices())
+ return true;
+
+ for (Value *V : GEP->indices())
+ if (!L->isLoopInvariant(V))
+ return false;
+
+ return true;
+ };
+
+ auto AdjustWidth = [&](Value *V, IRBuilder<> &IR,
+ unsigned TargetWidth, bool IsSigned) -> Value * {
+ if (V->getType()->getIntegerBitWidth() == TargetWidth)
+ return V;
+ if (IsSigned)
+ return IR.CreateSExt(V, IntegerType::get(V->getContext(), TargetWidth));
+ return IR.CreateZExt(V, IntegerType::get(V->getContext(), TargetWidth));
+ };
+
+ IRBuilder<> Builder(L->getHeader()->getFirstNonPHI());
+ for (auto *PHI : LoopPhis) {
+ if (!IsPtrToOffsetAddressingCandidate(PHI))
+ continue;
+
+ Value *CanonicalIV = L->getCanonicalInductionVariable();
+ if (!CanonicalIV)
+ continue;
+ if (!L->getLatchCmpInst())
+ return false;
+
+ Value *Op0 = PHI->getIncomingValue(0);
+ Value *Op1 = PHI->getIncomingValue(1);
+ Value *LoopDependentIncomingVal = NonInvariantPHIOp(Op0, Op1);
+ if (!LoopDependentIncomingVal)
+ continue;
+
+ if (auto *LoopStrideGEPInst =
+ dyn_cast<GetElementPtrInst>(LoopDependentIncomingVal)) {
+ Value *InvariantIncomingVal =
+ (Op0 == LoopDependentIncomingVal) ? Op1 : Op0;
+ if (!dyn_cast<GetElementPtrInst>(InvariantIncomingVal))
+ continue;
+
+ if (!IsConstantIncrement(LoopStrideGEPInst))
+ continue;
+
+ // Replace PHI with offset addressing GEP
+ Value *Stride = LoopStrideGEPInst->getOperand(1);
+ bool IsSigned = L->getLatchCmpInst()->isSigned();
+ unsigned MaxWidth = std::max(CanonicalIV->getType()->getIntegerBitWidth(),
+ Stride->getType()->getIntegerBitWidth());
+
+ CanonicalIV = AdjustWidth(CanonicalIV, Builder, MaxWidth, IsSigned);
+ Stride = AdjustWidth(Stride, Builder, MaxWidth, IsSigned);
+
+ Value *Mul = Builder.CreateMul(CanonicalIV, Stride);
+ Value *NewGEP =
+ Builder.CreateInBoundsGEP(LoopStrideGEPInst->getResultElementType(),
+ InvariantIncomingVal, ArrayRef(Mul));
+ PHI->replaceAllUsesWith(NewGEP);
+ DeadInsts.emplace_back(PHI);
+ Changed |= true;
+ }
+ }
+ return Changed;
+}
+
//===----------------------------------------------------------------------===//
// IndVarSimplify driver. Manage several subpasses of IV simplification.
//===----------------------------------------------------------------------===//
@@ -2116,6 +2224,9 @@ bool IndVarSimplify::run(Loop *L) {
SE->forgetLoop(L);
}
+ // Try to rewrite ptr increments with ptr offset addressing
+ Changed |= rewritePtrIncrementWithOffsettAddressing(L, DeadInsts);
+
// If we have a trip count expression, rewrite the loop's exit condition
// using it.
if (!DisableLFTR) {
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-ptr-addr-with-offset-addr.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-ptr-addr-with-offset-addr.ll
new file mode 100644
index 0000000000000..c08696f51b076
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-ptr-addr-with-offset-addr.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=indvars -S -o - | FileCheck %s
+
+
+define void @mat_transpose(float* %pIn, float* %pOut, i32 %nRows, i32 %nCols) {
+; CHECK-LABEL: define void @mat_transpose(
+; CHECK-SAME: ptr [[PIN:%.*]], ptr [[POUT:%.*]], i32 [[NROWS:%.*]], i32 [[NCOLS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_ROW:.*]]
+; CHECK: [[LOOP_ROW]]:
+; CHECK-NEXT: [[ROW_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ROW_INC:%.*]], %[[LOOP_ROW_END:.*]] ]
+; CHECK-NEXT: [[CMP_ROW:%.*]] = icmp ult i32 [[ROW_IV]], [[NROWS]]
+; CHECK-NEXT: br i1 [[CMP_ROW]], label %[[LOOP_COL_PRE:.*]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_COL_PRE]]:
+; CHECK-NEXT: [[PX_BASE:%.*]] = getelementptr inbounds float, ptr [[POUT]], i32 [[ROW_IV]]
+; CHECK-NEXT: br label %[[LOOP_COL:.*]]
+; CHECK: [[LOOP_COL]]:
+; CHECK-NEXT: [[COL_IV:%.*]] = phi i32 [ 0, %[[LOOP_COL_PRE]] ], [ [[COL_INC:%.*]], %[[LOOP_COL]] ]
+; CHECK-NEXT: [[PIN_PTR:%.*]] = phi ptr [ [[PIN]], %[[LOOP_COL_PRE]] ], [ [[PIN_NEXT:%.*]], %[[LOOP_COL]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[COL_IV]], [[NROWS]]
+; CHECK-NEXT: [[PX_PTR:%.*]] = getelementptr inbounds float, ptr [[PX_BASE]], i32 [[TMP0]]
+; CHECK-NEXT: [[PIN_LOAD:%.*]] = load float, ptr [[PIN_PTR]], align 4
+; CHECK-NEXT: [[PIN_NEXT]] = getelementptr float, ptr [[PIN_PTR]], i32 1
+; CHECK-NEXT: store float [[PIN_LOAD]], ptr [[PX_PTR]], align 4
+; CHECK-NEXT: [[COL_INC]] = add nuw i32 [[COL_IV]], 1
+; CHECK-NEXT: [[CMP_COL:%.*]] = icmp ult i32 [[COL_INC]], [[NCOLS]]
+; CHECK-NEXT: br i1 [[CMP_COL]], label %[[LOOP_COL]], label %[[LOOP_ROW_END]]
+; CHECK: [[LOOP_ROW_END]]:
+; CHECK-NEXT: [[ROW_INC]] = add nuw i32 [[ROW_IV]], 1
+; CHECK-NEXT: br label %[[LOOP_ROW]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop_row
+
+loop_row:
+ %row.iv = phi i32 [ 0, %entry ], [ %row.inc, %loop_row_end ]
+ %cmp.row = icmp ult i32 %row.iv, %nRows
+ br i1 %cmp.row, label %loop_col_pre, label %exit
+
+loop_col_pre:
+ %px.base = getelementptr inbounds float, float* %pOut, i32 %row.iv
+ br label %loop_col
+
+loop_col:
+ %col.iv = phi i32 [ 0, %loop_col_pre ], [ %col.inc, %loop_col ]
+ %pIn.ptr = phi float* [ %pIn, %loop_col_pre ], [ %pIn.next, %loop_col ]
+ %px.ptr = phi float* [ %px.base, %loop_col_pre ], [ %px.next, %loop_col ]
+ %pIn.load = load float, float* %pIn.ptr
+ %pIn.next = getelementptr float, float* %pIn.ptr, i32 1
+ store float %pIn.load, float* %px.ptr
+ %px.next = getelementptr float, float* %px.ptr, i32 %nRows
+ %col.inc = add nuw i32 %col.iv, 1
+ %cmp.col = icmp ult i32 %col.inc, %nCols
+ br i1 %cmp.col, label %loop_col, label %loop_row_end
+
+loop_row_end:
+ %row.inc = add nuw i32 %row.iv, 1
+ br label %loop_row
+
+exit:
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/171151
More information about the llvm-commits
mailing list