[llvm] 00d3f7c - [LAA] Make getPointersDiff() API compatible with opaque pointers

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 23 09:46:43 PDT 2021


Author: Nikita Popov
Date: 2021-06-23T18:44:34+02:00
New Revision: 00d3f7cc3c264adc360d0282ba8a27de2a004b94

URL: https://github.com/llvm/llvm-project/commit/00d3f7cc3c264adc360d0282ba8a27de2a004b94
DIFF: https://github.com/llvm/llvm-project/commit/00d3f7cc3c264adc360d0282ba8a27de2a004b94.diff

LOG: [LAA] Make getPointersDiff() API compatible with opaque pointers

Make getPointersDiff() and sortPtrAccesses() compatible with opaque
pointers by explicitly passing in the element type instead of
determining it from the pointer element type.

The SLPVectorizer result is slightly non-optimal in that unnecessary
pointer bitcasts are added.

Differential Revision: https://reviews.llvm.org/D104784

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll

Modified: 
    llvm/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 39acfd5bbbeec..882aee717dc89 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -684,7 +684,8 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
 /// is a simple API that does not depend on the analysis pass.
 /// \param StrictCheck Ensure that the calculated distance matches the
 /// type-based one after all the bitcasts removal in the provided pointers.
-Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
+Optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
+                              Value *PtrB, const DataLayout &DL,
                               ScalarEvolution &SE, bool StrictCheck = false,
                               bool CheckType = true);
 
@@ -698,7 +699,7 @@ Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
 /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
 /// saves the mask for actual memory accesses in program order in
 /// \p SortedIndices as <1,2,0,3>
-bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
+bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
                      ScalarEvolution &SE,
                      SmallVectorImpl<unsigned> &SortedIndices);
 

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 3248b2a2ac6b1..9b515508cef59 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1124,16 +1124,22 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
   return Stride;
 }
 
-Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
-                                    const DataLayout &DL, ScalarEvolution &SE,
-                                    bool StrictCheck, bool CheckType) {
+Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
+                                    Value *PtrB, const DataLayout &DL,
+                                    ScalarEvolution &SE, bool StrictCheck,
+                                    bool CheckType) {
   assert(PtrA && PtrB && "Expected non-nullptr pointers.");
+  assert(cast<PointerType>(PtrA->getType())
+             ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
+  assert(cast<PointerType>(PtrB->getType())
+             ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
+
   // Make sure that A and B are 
diff erent pointers.
   if (PtrA == PtrB)
     return 0;
 
-  // Make sure that PtrA and PtrB have the same type if required
-  if (CheckType && PtrA->getType() != PtrB->getType())
+  // Make sure that the element types are the same if required.
+  if (CheckType && ElemTyA != ElemTyB)
     return None;
 
   unsigned ASA = PtrA->getType()->getPointerAddressSpace();
@@ -1174,8 +1180,7 @@ Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
       return None;
     Val = Diff->getAPInt().getSExtValue();
   }
-  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
-  int Size = DL.getTypeStoreSize(Ty);
+  int Size = DL.getTypeStoreSize(ElemTyA);
   int Dist = Val / Size;
 
   // Ensure that the calculated distance matches the type-based one after all
@@ -1185,8 +1190,8 @@ Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
   return None;
 }
 
-bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
-                           ScalarEvolution &SE,
+bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
+                           const DataLayout &DL, ScalarEvolution &SE,
                            SmallVectorImpl<unsigned> &SortedIndices) {
   assert(llvm::all_of(
              VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
@@ -1204,8 +1209,8 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
   int Cnt = 1;
   bool IsConsecutive = true;
   for (auto *Ptr : VL.drop_front()) {
-    Optional<int> Diff =
-        getPointersDiff(Ptr0, Ptr, DL, SE, /*StrictCheck=*/true);
+    Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
+                                         /*StrictCheck=*/true);
     if (!Diff)
       return false;
 
@@ -1238,8 +1243,10 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
   Value *PtrB = getLoadStorePointerOperand(B);
   if (!PtrA || !PtrB)
     return false;
-  Optional<int> Diff =
-      getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);
+  Type *ElemTyA = getLoadStoreType(A);
+  Type *ElemTyB = getLoadStoreType(B);
+  Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
+                                       /*StrictCheck=*/true, CheckType);
   return Diff && *Diff == 1;
 }
 

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0ffa210ffa62d..47424541ff5bf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -987,9 +987,9 @@ class BoUpSLP {
         if (LI1->getParent() != LI2->getParent())
           return VLOperands::ScoreFail;
 
-        Optional<int> Dist =
-            getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(),
-                            DL, SE, /*StrictCheck=*/true);
+        Optional<int> Dist = getPointersDiff(
+            LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
+            LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
         return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
                                     : VLOperands::ScoreFail;
       }
@@ -2968,7 +2968,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
       OrdersType CurrentOrder;
       // Check the order of pointer operands.
-      if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
+      if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
         Value *Ptr0;
         Value *PtrN;
         if (CurrentOrder.empty()) {
@@ -2978,7 +2978,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           Ptr0 = PointerOps[CurrentOrder.front()];
           PtrN = PointerOps[CurrentOrder.back()];
         }
-        Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);
+        Optional<int> Diff = getPointersDiff(
+            ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
         // Check that the sorted loads are consecutive.
         if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
           if (CurrentOrder.empty()) {
@@ -3243,7 +3244,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
       OrdersType CurrentOrder;
       // Check the order of pointer operands.
-      if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
+      if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
         Value *Ptr0;
         Value *PtrN;
         if (CurrentOrder.empty()) {
@@ -3253,7 +3254,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           Ptr0 = PointerOps[CurrentOrder.front()];
           PtrN = PointerOps[CurrentOrder.back()];
         }
-        Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);
+        Optional<int> Dist =
+            getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
         // Check that the sorted pointer operands are consecutive.
         if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
           if (CurrentOrder.empty()) {
@@ -6893,9 +6895,10 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
     ++IterCnt;
     CheckedPairs[Idx].set(K);
     CheckedPairs[K].set(Idx);
-    Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),
-                                         Stores[Idx]->getPointerOperand(), *DL,
-                                         *SE, /*StrictCheck=*/true);
+    Optional<int> Diff = getPointersDiff(
+        Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
+        Stores[Idx]->getValueOperand()->getType(),
+        Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
     if (!Diff || *Diff == 0)
       return false;
     int Val = *Diff;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
new file mode 100644
index 0000000000000..e8b3f094bd4e2
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mtriple=x86_64-apple-macosx -mcpu=haswell < %s | FileCheck %s
+
+define void @test(ptr %r, ptr %p, ptr %q) #0 {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 0
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
+; CHECK-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
+; CHECK-NEXT:    [[Q0:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 0
+; CHECK-NEXT:    [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 1
+; CHECK-NEXT:    [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2
+; CHECK-NEXT:    [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast ptr [[P0]] to <4 x i64>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast ptr [[Q0]] to <4 x i64>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <4 x i64> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]]
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr inbounds i64, ptr %p, i64 0
+  %p1 = getelementptr inbounds i64, ptr %p, i64 1
+  %p2 = getelementptr inbounds i64, ptr %p, i64 2
+  %p3 = getelementptr inbounds i64, ptr %p, i64 3
+
+  %q0 = getelementptr inbounds i64, ptr %q, i64 0
+  %q1 = getelementptr inbounds i64, ptr %q, i64 1
+  %q2 = getelementptr inbounds i64, ptr %q, i64 2
+  %q3 = getelementptr inbounds i64, ptr %q, i64 3
+
+  %x0 = load i64, ptr %p0, align 2
+  %x1 = load i64, ptr %p1, align 2
+  %x2 = load i64, ptr %p2, align 2
+  %x3 = load i64, ptr %p3, align 2
+
+  %y0 = load i64, ptr %q0, align 2
+  %y1 = load i64, ptr %q1, align 2
+  %y2 = load i64, ptr %q2, align 2
+  %y3 = load i64, ptr %q3, align 2
+
+  %sub0 = sub nsw i64 %x0, %y0
+  %sub1 = sub nsw i64 %x1, %y1
+  %sub2 = sub nsw i64 %x2, %y2
+  %sub3 = sub nsw i64 %x3, %y3
+
+  %g0 = getelementptr inbounds i32, ptr %r, i64 %sub0
+  %g1 = getelementptr inbounds i32, ptr %r, i64 %sub1
+  %g2 = getelementptr inbounds i32, ptr %r, i64 %sub2
+  %g3 = getelementptr inbounds i32, ptr %r, i64 %sub3
+  ret void
+}


        


More information about the llvm-commits mailing list