[llvm] 0658bab - [SCEV] Infer flags from add/gep in any block

Wed Oct 6 11:12:00 PDT 2021

Author: Philip Reames
Date: 2021-10-06T11:11:54-07:00
New Revision: 0658bab870c89d81678f1f37aac0396ddd0913b3

URL: https://github.com/llvm/llvm-project/commit/0658bab870c89d81678f1f37aac0396ddd0913b3
DIFF: https://github.com/llvm/llvm-project/commit/0658bab870c89d81678f1f37aac0396ddd0913b3.diff

LOG: [SCEV] Infer flags from add/gep in any block

This patch removes a compile time restriction from isSCEVExprNeverPoison. We've strengthened our ability to reason about flags on scopes other than addrecs, and this bailout prevents us from using it. The comment is also suspect as well in that we're in the middle of constructing a SCEV for I. As such, we're going to visit all operands *anyways*.

Differential Revision: https://reviews.llvm.org/D111186

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
    llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
    llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 6683b1a5205c..4fb2266b6e89 100644

--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6645,16 +6645,6 @@ bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A,
 
 
 bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
-  // Here we check that I is in the header of the innermost loop containing I,
-  // since we only deal with instructions in the loop header. The actual loop we
-  // need to check later will come from an add recurrence, but getting that
-  // requires computing the SCEV of the operands, which can be expensive. This
-  // check we can do cheaply to rule out some cases early.
-  Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
-  if (InnermostContainingLoop == nullptr ||
-      InnermostContainingLoop->getHeader() != I->getParent())
-    return false;
-
   // Only proceed if we can prove that I does not yield poison.
   if (!programUndefinedIfPoison(I))
     return false;

diff  --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
index 0899f67d6914..91827f3231ba 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -623,7 +623,7 @@ entry:
 
 ; CHECK-LABEL: p9
 ; CHECK: da analyze - none!
-; CHECK: da analyze - flow [|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!

diff  --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
index f0bda26edb38..0423854bbc3b 100644
--- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -1628,9 +1628,9 @@ define noundef i32 @add-basic(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'add-basic'
 ; CHECK-NEXT:  Classifying expressions for: @add-basic
 ; CHECK-NEXT:    %res = add nuw nsw i32 %a, %b
-; CHECK-NEXT:    --> (%a + %b) U: full-set S: full-set
+; CHECK-NEXT:    --> (%a + %b)<nuw><nsw> U: full-set S: full-set
 ; CHECK-NEXT:    %res2 = udiv i32 255, %res
-; CHECK-NEXT:    --> (255 /u (%a + %b)) U: [0,256) S: [0,256)
+; CHECK-NEXT:    --> (255 /u (%a + %b)<nuw><nsw>) U: [0,256) S: [0,256)
 ; CHECK-NEXT:  Determining loop execution counts for: @add-basic
 ;
   %res = add nuw nsw i32 %a, %b
@@ -1656,9 +1656,9 @@ define noundef i32 @mul-basic(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'mul-basic'
 ; CHECK-NEXT:  Classifying expressions for: @mul-basic
 ; CHECK-NEXT:    %res = mul nuw nsw i32 %a, %b
-; CHECK-NEXT:    --> (%a * %b) U: full-set S: full-set
+; CHECK-NEXT:    --> (%a * %b)<nuw><nsw> U: full-set S: full-set
 ; CHECK-NEXT:    %res2 = udiv i32 255, %res
-; CHECK-NEXT:    --> (255 /u (%a * %b)) U: [0,256) S: [0,256)
+; CHECK-NEXT:    --> (255 /u (%a * %b)<nuw><nsw>) U: [0,256) S: [0,256)
 ; CHECK-NEXT:  Determining loop execution counts for: @mul-basic
 ;
   %res = mul nuw nsw i32 %a, %b

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
index e4000b52c4a9..8f57fe6866bd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -8,10 +8,6 @@ target triple = "x86_64-apple-macosx10.9.0"
 @C = common global [2000 x float] zeroinitializer, align 16
 @D = common global [2000 x float] zeroinitializer, align 16
 
-; Currently SCEV isn't smart enough to figure out that accesses
-; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but in future
-; that would hopefully be fixed. For now, check that this isn't
-; vectorized.
 ; Function Attrs: nounwind ssp uwtable
 define void @foo_3double(i32 %u) #0 {
 ; CHECK-LABEL: @foo_3double(
@@ -21,26 +17,25 @@ define void @foo_3double(i32 %u) #0 {
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 3
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX4]], align 8
-; CHECK-NEXT:    [[ADD5:%.*]] = fadd double [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    store double [[ADD5]], double* [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[MUL]], 1
 ; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load double, double* [[ARRAYIDX13]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load double, double* [[ARRAYIDX17]], align 8
-; CHECK-NEXT:    [[ADD18:%.*]] = fadd double [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    store double [[ADD18]], double* [[ARRAYIDX13]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
 ; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[MUL]], 2
 ; CHECK-NEXT:    [[IDXPROM25:%.*]] = sext i32 [[ADD24]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM25]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX26]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX26]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM25]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load double, double* [[ARRAYIDX30]], align 8
-; CHECK-NEXT:    [[ADD31:%.*]] = fadd double [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load double, double* [[ARRAYIDX30]], align 8
+; CHECK-NEXT:    [[ADD31:%.*]] = fadd double [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    store double [[ADD31]], double* [[ARRAYIDX26]], align 8
 ; CHECK-NEXT:    ret void
 ;