[llvm] LAA: rework and rename stripGetElementPtr (PR #125315)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 05:35:58 PST 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/125315
>From 70220a0b033002d42e841dc42c137b594015eb85 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 17 Feb 2025 13:03:24 +0000
Subject: [PATCH 1/2] LAA: pre-commit tests for strip-gep
---
.../LoopAccessAnalysis/symbolic-stride.ll | 43 +++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 525995156481c..71d04b217ce33 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -140,6 +140,49 @@ exit:
ret void
}
+; Test with multiple GEP indices
+define void @single_stride_array(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
+; CHECK-LABEL: 'single_stride_array'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %load = load [2 x i32], ptr %gep.A, align 4 ->
+; CHECK-NEXT: store [2 x i32] %ins, ptr %gep.A.next, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %mul = mul i64 %iv, %stride
+ %gep.A = getelementptr inbounds [2 x i32], ptr %A, i64 %mul, i64 1
+ %load = load [2 x i32], ptr %gep.A, align 4
+ %gep.B = getelementptr inbounds [2 x i32], ptr %B, i64 %iv
+ %load_1 = load [2 x i32], ptr %gep.B, align 4
+ %v1 = extractvalue [2 x i32] %load, 0
+ %v2 = extractvalue [2 x i32] %load_1, 0
+ %add = add i32 %v1, %v2
+ %ins = insertvalue [2 x i32] poison, i32 %add, 0
+ %iv.next = add nuw nsw i64 %iv, 1
+ %gep.A.next = getelementptr inbounds [2 x i32], ptr %A, i64 %iv.next
+ store [2 x i32] %ins, ptr %gep.A.next, align 4
+ %exitcond = icmp eq i64 %iv.next, %N
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
define void @single_stride_castexpr(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
; CHECK-LABEL: 'single_stride_castexpr'
; CHECK-NEXT: inner.loop:
>From 1752ccd11801a9325f179ac2da759d74d36d1bcb Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 31 Jan 2025 23:11:49 +0000
Subject: [PATCH 2/2] LAA: clarify loop-variant GEP idx computation
The stripGetElementPtr function is mysteriously named, and calls into
another mysterious getGEPInductionOperand which does something
complicated with GEP indices. The real purpose of the badly-named
stripGetElementPtr function is to get a loop-variant GEP index, if there
is one. The getGEPInductionOperand is totally redundant, as stripping
off zeros from the end of GEP indices has no effect on computing the
loop-variant GEP index, as constant zeros are always loop-invariant.
Moreover, the GEP induction operand is simply the first non-zero index
from the end, which stripGetElementPtr returns when it finds that any of
the GEP indices are loop-variant: this is a completely unrelated value
to the GEP index that is loop-variant. The implicit assumption here is
that there is only ever one loop-variant index, and it is the first
non-zero one from the end.
The logic is unnecessarily complicated for what stripGetElementPtr wants
to achieve, and the header comments are confusing as well. Strip
getGEPInductionOperand, rework and rename stripGetElementPtr.
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 61 ++++++-------------
.../LoopAccessAnalysis/symbolic-stride.ll | 8 ++-
2 files changed, 23 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7d6dbd51a404d..676e64b8982ad 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -42,13 +42,12 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
@@ -66,7 +65,6 @@
#include <vector>
using namespace llvm;
-using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-accesses"
@@ -2815,50 +2813,25 @@ bool LoopAccessInfo::isInvariant(Value *V) const {
return SE->isLoopInvariant(S, TheLoop);
}
-/// Find the operand of the GEP that should be checked for consecutive
-/// stores. This ignores trailing indices that have no effect on the final
-/// pointer.
-static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
- const DataLayout &DL = Gep->getDataLayout();
- unsigned LastOperand = Gep->getNumOperands() - 1;
- TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
-
- // Walk backwards and try to peel off zeros.
- while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
- // Find the type we're currently indexing into.
- gep_type_iterator GEPTI = gep_type_begin(Gep);
- std::advance(GEPTI, LastOperand - 2);
-
- // If it's a type with the same allocation size as the result of the GEP we
- // can peel off the zero index.
- TypeSize ElemSize = GEPTI.isStruct()
- ? DL.getTypeAllocSize(GEPTI.getIndexedType())
- : GEPTI.getSequentialElementStride(DL);
- if (ElemSize != GEPAllocSize)
- break;
- --LastOperand;
- }
-
- return LastOperand;
-}
-
-/// If the argument is a GEP, then returns the operand identified by
-/// getGEPInductionOperand. However, if there is some other non-loop-invariant
-/// operand, it returns that instead.
-static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
+/// Otherwise, return \p Ptr.
+static Value *getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE,
+ Loop *Lp) {
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return Ptr;
- unsigned InductionOperand = getGEPInductionOperand(GEP);
-
- // Check that all of the gep indices are uniform except for our induction
- // operand.
- for (unsigned I = 0, E = GEP->getNumOperands(); I != E; ++I)
- if (I != InductionOperand &&
- !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(I)), Lp))
- return Ptr;
- return GEP->getOperand(InductionOperand);
+ Value *V = Ptr;
+ for (const Use &U : GEP->operands()) {
+ if (!SE->isLoopInvariant(SE->getSCEV(U), Lp)) {
+ if (V == Ptr)
+ V = U;
+ else
+ // There must be exactly one loop-variant operand.
+ return Ptr;
+ }
+ }
+ return V;
}
/// Get the stride of a pointer access in a loop. Looks for symbolic
@@ -2873,7 +2846,7 @@ static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *L
// pointer, otherwise, we are analyzing the index.
Value *OrigPtr = Ptr;
- Ptr = stripGetElementPtr(Ptr, SE, Lp);
+ Ptr = getLoopVariantGEPOperand(Ptr, SE, Lp);
const SCEV *V = SE->getSCEV(Ptr);
if (Ptr != OrigPtr)
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 71d04b217ce33..8603417081067 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -145,9 +145,9 @@ define void @single_stride_array(ptr noalias %A, ptr noalias %B, i64 %N, i64 %st
; CHECK-LABEL: 'single_stride_array'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: Backward:
; CHECK-NEXT: %load = load [2 x i32], ptr %gep.A, align 4 ->
; CHECK-NEXT: store [2 x i32] %ins, ptr %gep.A.next, align 4
; CHECK-EMPTY:
@@ -156,8 +156,12 @@ define void @single_stride_array(ptr noalias %A, ptr noalias %B, i64 %N, i64 %st
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: Equal predicate: %stride == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds [2 x i32], ptr %A, i64 %mul, i64 1:
+; CHECK-NEXT: {(4 + %A),+,(8 * %stride)}<%loop>
+; CHECK-NEXT: --> {(4 + %A),+,8}<%loop>
;
entry:
br label %loop
More information about the llvm-commits
mailing list