[llvm] [LAA] Keep track of sink ptr forked by a phi (PR #67223)

via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 23 00:30:14 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-analysis

<details>
<summary>Changes</summary>

Given a function like the following: https://godbolt.org/z/nqW3637Mf
```
void s1161_ptr (int *Preds, double *a, double *b, double *c) {
  for (int i = 0; i < 1000; ++i) {
    if (Preds[i] != 0)
      b[i] = a[i] + 1;
    else
      a[i] = i * i;
  }
}
```
LLVM will optimize the IR to a single store by a phi instruction:
```
for.body:                               ; preds = %entry, %for.inc
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
  ...
if.then:                                ; preds = %for.body
  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
  ...
  br label %for.inc
if.else:
  ...
  br label %for.inc
for.inc:                                ; preds = %if.then, %if.else
  %b.sink = phi ptr [ %b, %if.then ], [ %a, %if.else ]
  %add.sink = phi double [ %add, %if.then ], [ %conv, %if.else ]
  %arrayidx5 = getelementptr inbounds double, ptr %b.sink, i64
%indvars.iv
  store double %add.sink, ptr %arrayidx5, align 8
```

Because array A has read and write operations at the same time in the loop body, we need extra runtime checks for this alias set compare to the previous scenario in commit be9211279.

To track the forked pointer, we transform it back to 2 PointerInfo entries with separate value just like the IR generated with option -simplifycfg-sink-common=false.

---
Full diff: https://github.com/llvm/llvm-project/pull/67223.diff


2 Files Affected:

- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+39-5) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll (+87-12) 


``````````diff
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 245cce25a3d5743..6c775bf6bb70666 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -804,6 +805,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
 }
 
 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
+                          PredicatedScalarEvolution &PSE,
                           function_ref<void(Value *)> AddPointer) {
   SmallPtrSet<Value *, 8> Visited;
   SmallVector<Value *> WorkList;
@@ -821,8 +823,40 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
         PN->getParent() != InnermostLoop.getHeader()) {
       for (const Use &Inc : PN->incoming_values())
         WorkList.push_back(Inc);
-    } else
+    } else {
+      auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+      ScalarEvolution &SE = *PSE.getSE();
+      // Decompose the GEP when its offset is a induction variable.
+      if (GEP && GEP->getNumOperands() == 2 &&
+          isa<SCEVAddExpr>(SE.getSCEV(GEP)) &&
+          !InnermostLoop.isLoopInvariant(GEP->getOperand(0))) {
+        if (auto *PhiI = dyn_cast<PHINode>(GEP->getOperand(0))) {
+          const SCEV *Offset = SE.getSCEV(GEP->getOperand(1));
+          if (PhiI->getNumOperands() == 2 && isa<SCEVAddRecExpr>(Offset) &&
+              InnermostLoop.isLoopInvariant(PhiI->getIncomingValue(0)) &&
+              InnermostLoop.isLoopInvariant(PhiI->getIncomingValue(1))) {
+            // Transform back to 2 PointerInfo entries with separate values to
+            // track the forked pointer.
+            Type *SourceTy = GEP->getResultElementType();
+            bool IsInBounds = GEP->isInBounds();
+            SmallVector<Value *, 4> Index(GEP->indices());
+            IRBuilder<> BuilderA(PhiI->getIncomingBlock(0)->getTerminator());
+            Value *In0 = PhiI->getIncomingValue(0);
+            auto *GEPA = BuilderA.CreateGEP(
+                SourceTy, In0, Index, GEP->getName() + ".in0", IsInBounds);
+            IRBuilder<> BuilderB(PhiI->getIncomingBlock(1)->getTerminator());
+            Value *In1 = PhiI->getIncomingValue(1);
+            auto *GEPB = BuilderA.CreateGEP(
+                SourceTy, In1, Index, GEP->getName() + ".in1", IsInBounds);
+            AddPointer(GEPA);
+            AddPointer(GEPB);
+            continue;
+          }
+        }
+      }
+
       AddPointer(Ptr);
+    }
   }
 }
 
@@ -1634,7 +1668,7 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
 }
 
 void MemoryDepChecker::addAccess(StoreInst *SI) {
-  visitPointers(SI->getPointerOperand(), *InnermostLoop,
+  visitPointers(SI->getPointerOperand(), *InnermostLoop, PSE,
                 [this, SI](Value *Ptr) {
                   Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
                   InstMap.push_back(SI);
@@ -1643,7 +1677,7 @@ void MemoryDepChecker::addAccess(StoreInst *SI) {
 }
 
 void MemoryDepChecker::addAccess(LoadInst *LI) {
-  visitPointers(LI->getPointerOperand(), *InnermostLoop,
+  visitPointers(LI->getPointerOperand(), *InnermostLoop, PSE,
                 [this, LI](Value *Ptr) {
                   Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
                   InstMap.push_back(LI);
@@ -2367,7 +2401,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
       if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
         Loc.AATags.TBAA = nullptr;
 
-      visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+      visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, *PSE,
                     [&Accesses, AccessTy, Loc](Value *Ptr) {
                       MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
                       Accesses.addStore(NewLoc, AccessTy);
@@ -2416,7 +2450,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
     if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
       Loc.AATags.TBAA = nullptr;
 
-    visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+    visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, *PSE,
                   [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
                     MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
                     Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
index 30ffc4cb5e1e85f..3baac4d34e6078f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
@@ -944,32 +944,37 @@ define void @forked_ptrs_with_different_base(ptr nocapture readonly %Preds, ptr
 ; CHECK-NEXT:    Run-time memory checks:
 ; CHECK-NEXT:    Check 0:
 ; CHECK-NEXT:      Comparing group ([[G1:.+]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv
 ; CHECK-NEXT:      Against group ([[G2:.+]]):
-; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv
 ; CHECK-NEXT:    Check 1:
 ; CHECK-NEXT:      Comparing group ([[G1]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3:.+]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Check 2:
+; CHECK-NEXT:      Comparing group ([[G1]]):
+; CHECK-NEXT:        %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv
 ; CHECK-NEXT:      Against group ([[G4:.+]]):
 ; CHECK-NEXT:        %arrayidx5 = getelementptr inbounds double, ptr %0, i64 %indvars.iv
-; CHECK-NEXT:    Check 2:
-; CHECK-NEXT:      Comparing group ([[G3:.+]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
-; CHECK-NEXT:      Against group ([[G2]]):
-; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
 ; CHECK-NEXT:    Check 3:
-; CHECK-NEXT:      Comparing group ([[G3]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
+; CHECK-NEXT:      Comparing group ([[G2]]):
+; CHECK-NEXT:        %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Check 4:
+; CHECK-NEXT:      Comparing group ([[G2]]):
+; CHECK-NEXT:        %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv
 ; CHECK-NEXT:      Against group ([[G4]]):
 ; CHECK-NEXT:        %arrayidx5 = getelementptr inbounds double, ptr %0, i64 %indvars.iv
 ; CHECK-NEXT:    Grouped accesses:
 ; CHECK-NEXT:      Group [[G1]]:
 ; CHECK-NEXT:        (Low: %1 High: (63992 + %1))
 ; CHECK-NEXT:          Member: {%1,+,8}<nw><%for.body>
-; CHECK-NEXT:      Group [[G3]]:
+; CHECK-NEXT:      Group [[G2]]:
 ; CHECK-NEXT:        (Low: %2 High: (63992 + %2))
 ; CHECK-NEXT:          Member: {%2,+,8}<nw><%for.body>
-; CHECK-NEXT:      Group [[G2]]:
+; CHECK-NEXT:      Group [[G3]]:
 ; CHECK-NEXT:        (Low: %Preds High: (31996 + %Preds))
 ; CHECK-NEXT:          Member: {%Preds,+,4}<nuw><%for.body>
 ; CHECK-NEXT:      Group [[G4]]:
@@ -1058,4 +1063,74 @@ for.inc:                                          ; preds = %if.br1, %if.br0
 
 for.cond.cleanup:                                 ; preds = %for.inc
   ret void
+}
+
+define void @forked_write_ptr_with_read_of_same_base(ptr nocapture readonly %Preds, ptr nocapture %a, ptr nocapture writeonly %b, ptr nocapture readnone %c) {
+; CHECK:       for.body:
+; CHECK-NEXT:    Memory dependences are safe with run-time checks
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Check 0:
+; CHECK-NEXT:      Comparing group ([[G1:.+]]):
+; CHECK-NEXT:        %arrayidx5.in01 = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G2:.+]]):
+; CHECK-NEXT:        %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx5.in12 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:    Check 1:
+; CHECK-NEXT:      Comparing group ([[G1]]):
+; CHECK-NEXT:        %arrayidx5.in01 = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3:.+]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Check 2:
+; CHECK-NEXT:      Comparing group ([[G2]]):
+; CHECK-NEXT:        %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx5.in12 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-NEXT:      Group [[G1]]:
+; CHECK-NEXT:        (Low: %b High: (8000 + %b))
+; CHECK-NEXT:          Member: {%b,+,8}<nw><%for.body>
+; CHECK-NEXT:      Group [[G2]]:
+; CHECK-NEXT:        (Low: %a High: (8000 + %a))
+; CHECK-NEXT:          Member: {%a,+,8}<nw><%for.body>
+; CHECK-NEXT:          Member: {%a,+,8}<nw><%for.body>
+; CHECK-NEXT:      Group [[G3]]:
+; CHECK-NEXT:        (Low: %Preds High: (4000 + %Preds))
+; CHECK-NEXT:          Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc
+  ret void
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  br i1 %cmp1.not, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx3, align 8
+  %add = fadd fast double %1, 1.000000e+00
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv
+  %3 = trunc i64 %2 to i32
+  %conv = sitofp i32 %3 to double
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %b.sink = phi ptr [ %b, %if.then ], [ %a, %if.else ]
+  %add.sink = phi double [ %add, %if.then ], [ %conv, %if.else ]
+  %arrayidx5 = getelementptr inbounds double, ptr %b.sink, i64 %indvars.iv
+  store double %add.sink, ptr %arrayidx5, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/67223


More information about the llvm-commits mailing list