[llvm] 6176f04 - [LAA] Do not set CanDoRT to false for AS that do not need RT checks.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 14 12:56:42 PDT 2020


Author: Florian Hahn
Date: 2020-06-14T20:55:59+01:00
New Revision: 6176f04436bb71825bb8ad51d13e4349daa3b506

URL: https://github.com/llvm/llvm-project/commit/6176f04436bb71825bb8ad51d13e4349daa3b506
DIFF: https://github.com/llvm/llvm-project/commit/6176f04436bb71825bb8ad51d13e4349daa3b506.diff

LOG: [LAA] Do not set CanDoRT to false for AS that do not need RT checks.

Alternative approach to D80570.

canCheckPtrAtRT already contains checks the figure out for which alias
sets runtime checks are needed. But it currently sets CanDoRT to false
for alias sets for which we cannot do RT checks but also do not need
any.

If we know that we do not need RT checks based on the number of
reads/writes in the alias set, we can skip processing the AS.

This patch also adds an assertion to ensure that DepCands does not
contain more than one write from the alias set.

Reviewers: Ayal, anemet, hfinkel, dmgreen

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D80622

Added: 
    llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll

Modified: 
    llvm/lib/Analysis/LoopAccessAnalysis.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 4cb2db58100b..3789be757aca 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -712,11 +712,12 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
 
   // We assign a consecutive id to access from 
diff erent alias sets.
   // Accesses between 
diff erent groups doesn't need to be checked.
-  unsigned ASId = 1;
+  unsigned ASId = 0;
   for (auto &AS : AST) {
     int NumReadPtrChecks = 0;
     int NumWritePtrChecks = 0;
     bool CanDoAliasSetRT = true;
+    ++ASId;
 
     // We assign consecutive id to access from 
diff erent dependence sets.
     // Accesses within the same set don't need a runtime check.
@@ -752,9 +753,25 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
     // we have a pointer for which we couldn't find the bounds but we don't
     // actually need to emit any checks so it does not matter.
     bool NeedsAliasSetRTCheck = false;
-    if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
+    if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) {
       NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
                              (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
+      // For alias sets without at least 2 writes or 1 write and 1 read, there
+      // is no need to generate RT checks and CanDoAliasSetRT for this alias set
+      // does not impact whether runtime checks can be generated.
+      if (!NeedsAliasSetRTCheck) {
+        assert((AS.size() <= 1 ||
+                all_of(AS,
+                       [this](auto AC) {
+                         MemAccessInfo AccessWrite(AC.getValue(), true);
+                         return DepCands.findValue(AccessWrite) ==
+                                DepCands.end();
+                       })) &&
+               "Can only skip updating CanDoRT below, if all entries in AS "
+               "are reads or there is at most 1 entry");
+        continue;
+      }
+    }
 
     // We need to perform run-time alias checks, but some pointers had bounds
     // that couldn't be checked.

diff  --git a/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
new file mode 100644
index 000000000000..5f48283f52a4
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
@@ -0,0 +1,150 @@
+; RUN: opt  -loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
+
+; Tests with alias sets that contain points with uncomputable bounds because
+; they include %offset.1, which is loaded in each loop iteration.
+
+; Alias set with uncomputable bounds contains a single load. We do not need
+; runtime checks for that group and it should not block vectorization.
+define void @test1_uncomputable_bounds_single_load(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+; CHECK-LABEL: define void @test1_uncomputable_bounds_single_load
+; CHECK:       vector.body
+; CHECK:         ret void
+
+entry:
+  %cond = icmp sgt i64 %N, 0
+  br i1 %cond, label %ph, label %exit
+
+ph:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
+  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
+  %offset.1 = load i32, i32* %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
+  %lv = load i32, i32* %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
+  store i32 %lv , i32* %gep.3, align 4
+  %offset.2 = add nsw i64 %iv, %X
+  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
+  store i32 %lv, i32* %gep.4, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %loop.exit, label %loop
+
+loop.exit:
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Alias set with uncomputable bounds contains a single store. We do not need
+; runtime checks for that group and it should not block vectorization.
+define void @test2_uncomputable_bounds_single_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+; CHECK-LABEL: define void @test2_uncomputable_bounds_single_store
+; CHECK:       vector.body
+; CHECK:         ret void
+
+entry:
+  %cond = icmp sgt i64 %N, 0
+  br i1 %cond, label %ph, label %exit
+
+ph:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
+  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
+  %offset.1 = load i32, i32* %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
+  store i32 20, i32* %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
+  store i32 0 , i32* %gep.3, align 4
+  %offset.2 = add nsw i64 %iv, %X
+  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
+  store i32 10, i32* %gep.4, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %loop.exit, label %loop
+
+loop.exit:
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Alias set with uncomputable bounds contains a load and a store. This blocks
+; vectorization, as we cannot generate runtime-checks for the set.
+define void @test3_uncomputable_bounds_load_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+; CHECK-LABEL: define void @test3_uncomputable_bounds_load_store
+; CHECK-NOT: vector.body
+
+entry:
+  %cond = icmp sgt i64 %N, 0
+  br i1 %cond, label %ph, label %exit
+
+ph:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
+  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
+  %offset.1 = load i32, i32* %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
+  store i32 20, i32* %gep.2, align 4
+  %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv
+  %lv = load i32, i32* %gep.22, align 4
+  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
+  store i32 %lv , i32* %gep.3, align 4
+  %offset.2 = add nsw i64 %iv, %X
+  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
+  store i32 %lv, i32* %gep.4, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %loop.exit, label %loop
+
+loop.exit:
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Alias set with uncomputable bounds contains a load and a store. This blocks
+; vectorization, as we cannot generate runtime-checks for the set.
+define void @test4_uncomputable_bounds_store_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+; CHECK-LABEL: define void @test4_uncomputable_bounds_store_store
+; CHECK-NOT: vector.body
+
+entry:
+  %cond = icmp sgt i64 %N, 0
+  br i1 %cond, label %ph, label %exit
+
+ph:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
+  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
+  %offset.1 = load i32, i32* %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
+  store i32 20, i32* %gep.2, align 4
+  %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv
+  store i32 30, i32* %gep.22, align 4
+  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
+  store i32 0 , i32* %gep.3, align 4
+  %offset.2 = add nsw i64 %iv, %X
+  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
+  store i32 10, i32* %gep.4, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %loop.exit, label %loop
+
+loop.exit:
+  br label %exit
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list