[llvm] [LAA] Keep track of sink ptr forked by a phi (PR #67223)

via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 23 00:29:06 PDT 2023


https://github.com/vfdff created https://github.com/llvm/llvm-project/pull/67223

Given a function like the following: https://godbolt.org/z/nqW3637Mf
```
void s1161_ptr (int *Preds, double *a, double *b, double *c) {
  for (int i = 0; i < 1000; ++i) {
    if (Preds[i] != 0)
      b[i] = a[i] + 1;
    else
      a[i] = i * i;
  }
}
```
LLVM will optimize the IR to a single store by a phi instruction:
```
for.body:                               ; preds = %entry, %for.inc
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
  ...
if.then:                                ; preds = %for.body
  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
  ...
  br label %for.inc
if.else:
  ...
  br label %for.inc
for.inc:                                ; preds = %if.then, %if.else
  %b.sink = phi ptr [ %b, %if.then ], [ %a, %if.else ]
  %add.sink = phi double [ %add, %if.then ], [ %conv, %if.else ]
  %arrayidx5 = getelementptr inbounds double, ptr %b.sink, i64
%indvars.iv
  store double %add.sink, ptr %arrayidx5, align 8
```

Because array A has read and write operations at the same time in the loop body, we need extra runtime checks for this alias set compare to the previous scenario in commit be9211279.

To track the forked pointer, we transform it back to 2 PointerInfo entries with separate value just like the IR generated with option -simplifycfg-sink-common=false.

>From 40c8531fc42cf15b3b679e65ed77267d0396542b Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Fri, 22 Sep 2023 08:32:08 -0400
Subject: [PATCH] [LAA] Keep track of sink ptr forked by a phi

Given a function like the following: https://godbolt.org/z/nqW3637Mf
```
void s1161_ptr (int *Preds, double *a, double *b, double *c) {
  for (int i = 0; i < 1000; ++i) {
    if (Preds[i] != 0)
      b[i] = a[i] + 1;
    else
      a[i] = i * i;
  }
}
```
LLVM will optimize the IR to a single store by a phi instruction:
```
for.body:                               ; preds = %entry, %for.inc
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
  ...
if.then:                                ; preds = %for.body
  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
  ...
  br label %for.inc
if.else:
  ...
  br label %for.inc
for.inc:                                ; preds = %if.then, %if.else
  %b.sink = phi ptr [ %b, %if.then ], [ %a, %if.else ]
  %add.sink = phi double [ %add, %if.then ], [ %conv, %if.else ]
  %arrayidx5 = getelementptr inbounds double, ptr %b.sink, i64
%indvars.iv
  store double %add.sink, ptr %arrayidx5, align 8
```

Because array A has read and write operations at the same time in the
loop body, we need extra runtime checks for this alias set compare to
the previous scenario in commit be9211279.

To track the forked pointer, we transform it back to 2 PointerInfo entries
with separate value just like the IR generated with option
-simplifycfg-sink-common=false.
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 44 ++++++++-
 .../LoopAccessAnalysis/forked-pointers.ll     | 99 ++++++++++++++++---
 2 files changed, 126 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 245cce25a3d5743..6c775bf6bb70666 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -804,6 +805,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
 }
 
 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
+                          PredicatedScalarEvolution &PSE,
                           function_ref<void(Value *)> AddPointer) {
   SmallPtrSet<Value *, 8> Visited;
   SmallVector<Value *> WorkList;
@@ -821,8 +823,40 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
         PN->getParent() != InnermostLoop.getHeader()) {
       for (const Use &Inc : PN->incoming_values())
         WorkList.push_back(Inc);
-    } else
+    } else {
+      auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+      ScalarEvolution &SE = *PSE.getSE();
+      // Decompose the GEP when its offset is a induction variable.
+      if (GEP && GEP->getNumOperands() == 2 &&
+          isa<SCEVAddExpr>(SE.getSCEV(GEP)) &&
+          !InnermostLoop.isLoopInvariant(GEP->getOperand(0))) {
+        if (auto *PhiI = dyn_cast<PHINode>(GEP->getOperand(0))) {
+          const SCEV *Offset = SE.getSCEV(GEP->getOperand(1));
+          if (PhiI->getNumOperands() == 2 && isa<SCEVAddRecExpr>(Offset) &&
+              InnermostLoop.isLoopInvariant(PhiI->getIncomingValue(0)) &&
+              InnermostLoop.isLoopInvariant(PhiI->getIncomingValue(1))) {
+            // Transform back to 2 PointerInfo entries with separate values to
+            // track the forked pointer.
+            Type *SourceTy = GEP->getResultElementType();
+            bool IsInBounds = GEP->isInBounds();
+            SmallVector<Value *, 4> Index(GEP->indices());
+            IRBuilder<> BuilderA(PhiI->getIncomingBlock(0)->getTerminator());
+            Value *In0 = PhiI->getIncomingValue(0);
+            auto *GEPA = BuilderA.CreateGEP(
+                SourceTy, In0, Index, GEP->getName() + ".in0", IsInBounds);
+            IRBuilder<> BuilderB(PhiI->getIncomingBlock(1)->getTerminator());
+            Value *In1 = PhiI->getIncomingValue(1);
+            auto *GEPB = BuilderA.CreateGEP(
+                SourceTy, In1, Index, GEP->getName() + ".in1", IsInBounds);
+            AddPointer(GEPA);
+            AddPointer(GEPB);
+            continue;
+          }
+        }
+      }
+
       AddPointer(Ptr);
+    }
   }
 }
 
@@ -1634,7 +1668,7 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
 }
 
 void MemoryDepChecker::addAccess(StoreInst *SI) {
-  visitPointers(SI->getPointerOperand(), *InnermostLoop,
+  visitPointers(SI->getPointerOperand(), *InnermostLoop, PSE,
                 [this, SI](Value *Ptr) {
                   Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
                   InstMap.push_back(SI);
@@ -1643,7 +1677,7 @@ void MemoryDepChecker::addAccess(StoreInst *SI) {
 }
 
 void MemoryDepChecker::addAccess(LoadInst *LI) {
-  visitPointers(LI->getPointerOperand(), *InnermostLoop,
+  visitPointers(LI->getPointerOperand(), *InnermostLoop, PSE,
                 [this, LI](Value *Ptr) {
                   Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
                   InstMap.push_back(LI);
@@ -2367,7 +2401,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
       if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
         Loc.AATags.TBAA = nullptr;
 
-      visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+      visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, *PSE,
                     [&Accesses, AccessTy, Loc](Value *Ptr) {
                       MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
                       Accesses.addStore(NewLoc, AccessTy);
@@ -2416,7 +2450,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
     if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
       Loc.AATags.TBAA = nullptr;
 
-    visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+    visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, *PSE,
                   [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
                     MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
                     Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
index 30ffc4cb5e1e85f..3baac4d34e6078f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
@@ -944,32 +944,37 @@ define void @forked_ptrs_with_different_base(ptr nocapture readonly %Preds, ptr
 ; CHECK-NEXT:    Run-time memory checks:
 ; CHECK-NEXT:    Check 0:
 ; CHECK-NEXT:      Comparing group ([[G1:.+]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv
 ; CHECK-NEXT:      Against group ([[G2:.+]]):
-; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv
 ; CHECK-NEXT:    Check 1:
 ; CHECK-NEXT:      Comparing group ([[G1]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3:.+]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Check 2:
+; CHECK-NEXT:      Comparing group ([[G1]]):
+; CHECK-NEXT:        %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv
 ; CHECK-NEXT:      Against group ([[G4:.+]]):
 ; CHECK-NEXT:        %arrayidx5 = getelementptr inbounds double, ptr %0, i64 %indvars.iv
-; CHECK-NEXT:    Check 2:
-; CHECK-NEXT:      Comparing group ([[G3:.+]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
-; CHECK-NEXT:      Against group ([[G2]]):
-; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
 ; CHECK-NEXT:    Check 3:
-; CHECK-NEXT:      Comparing group ([[G3]]):
-; CHECK-NEXT:        %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv
+; CHECK-NEXT:      Comparing group ([[G2]]):
+; CHECK-NEXT:        %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Check 4:
+; CHECK-NEXT:      Comparing group ([[G2]]):
+; CHECK-NEXT:        %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv
 ; CHECK-NEXT:      Against group ([[G4]]):
 ; CHECK-NEXT:        %arrayidx5 = getelementptr inbounds double, ptr %0, i64 %indvars.iv
 ; CHECK-NEXT:    Grouped accesses:
 ; CHECK-NEXT:      Group [[G1]]:
 ; CHECK-NEXT:        (Low: %1 High: (63992 + %1))
 ; CHECK-NEXT:          Member: {%1,+,8}<nw><%for.body>
-; CHECK-NEXT:      Group [[G3]]:
+; CHECK-NEXT:      Group [[G2]]:
 ; CHECK-NEXT:        (Low: %2 High: (63992 + %2))
 ; CHECK-NEXT:          Member: {%2,+,8}<nw><%for.body>
-; CHECK-NEXT:      Group [[G2]]:
+; CHECK-NEXT:      Group [[G3]]:
 ; CHECK-NEXT:        (Low: %Preds High: (31996 + %Preds))
 ; CHECK-NEXT:          Member: {%Preds,+,4}<nuw><%for.body>
 ; CHECK-NEXT:      Group [[G4]]:
@@ -1058,4 +1063,74 @@ for.inc:                                          ; preds = %if.br1, %if.br0
 
 for.cond.cleanup:                                 ; preds = %for.inc
   ret void
+}
+
+define void @forked_write_ptr_with_read_of_same_base(ptr nocapture readonly %Preds, ptr nocapture %a, ptr nocapture writeonly %b, ptr nocapture readnone %c) {
+; CHECK:       for.body:
+; CHECK-NEXT:    Memory dependences are safe with run-time checks
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Check 0:
+; CHECK-NEXT:      Comparing group ([[G1:.+]]):
+; CHECK-NEXT:        %arrayidx5.in01 = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G2:.+]]):
+; CHECK-NEXT:        %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx5.in12 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:    Check 1:
+; CHECK-NEXT:      Comparing group ([[G1]]):
+; CHECK-NEXT:        %arrayidx5.in01 = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3:.+]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Check 2:
+; CHECK-NEXT:      Comparing group ([[G2]]):
+; CHECK-NEXT:        %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:        %arrayidx5.in12 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:      Against group ([[G3]]):
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-NEXT:      Group [[G1]]:
+; CHECK-NEXT:        (Low: %b High: (8000 + %b))
+; CHECK-NEXT:          Member: {%b,+,8}<nw><%for.body>
+; CHECK-NEXT:      Group [[G2]]:
+; CHECK-NEXT:        (Low: %a High: (8000 + %a))
+; CHECK-NEXT:          Member: {%a,+,8}<nw><%for.body>
+; CHECK-NEXT:          Member: {%a,+,8}<nw><%for.body>
+; CHECK-NEXT:      Group [[G3]]:
+; CHECK-NEXT:        (Low: %Preds High: (4000 + %Preds))
+; CHECK-NEXT:          Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc
+  ret void
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  br i1 %cmp1.not, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx3, align 8
+  %add = fadd fast double %1, 1.000000e+00
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv
+  %3 = trunc i64 %2 to i32
+  %conv = sitofp i32 %3 to double
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %b.sink = phi ptr [ %b, %if.then ], [ %a, %if.else ]
+  %add.sink = phi double [ %add, %if.then ], [ %conv, %if.else ]
+  %arrayidx5 = getelementptr inbounds double, ptr %b.sink, i64 %indvars.iv
+  store double %add.sink, ptr %arrayidx5, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
\ No newline at end of file



More information about the llvm-commits mailing list