[llvm] 9a4dd4d - [SLP]No need to mark scatter load pointer as scalar as it gets vectorized.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 22 11:59:21 PST 2021


Author: Alexey Bataev
Date: 2021-02-22T11:58:28-08:00
New Revision: 9a4dd4de9d2f80744a0729d7f5064e465027853b

URL: https://github.com/llvm/llvm-project/commit/9a4dd4de9d2f80744a0729d7f5064e465027853b
DIFF: https://github.com/llvm/llvm-project/commit/9a4dd4de9d2f80744a0729d7f5064e465027853b.diff

LOG: [SLP]No need to mark scatter load pointer as scalar as it gets vectorized.

Pointer operand of scatter loads does not remain scalar in the tree (it
gest vectorized) and thus must not be marked as the scalar that remains
scalar in vectorized form.

Differential Revision: https://reviews.llvm.org/D96818

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0b630197911a..1184706dcfde 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2575,6 +2575,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
           // instructions. If that is the case, the one in Lane 0 will
           // be used.
           if (UseScalar != U ||
+              UseEntry->State == TreeEntry::ScatterVectorize ||
               !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
             LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
                               << ".\n");

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index bfaa24fec252..123e498f1d96 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -243,20 +243,19 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double
 ; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
 ; CHECK-NEXT:    [[B2:%.*]] = load double, double* [[IDXB2]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[TMP2]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double*> [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A1]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B2]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    [[TMP13:%.*]] = fadd fast <2 x double> [[TMP12]], [[TMP11]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B2]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = fsub fast <2 x double> [[TMP7]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast <2 x double> [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
 ; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8
 ; CHECK-NEXT:    store double [[A1]], double* [[EXT1:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -320,34 +319,36 @@ define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, do
 ; CHECK-LABEL: @lookahead_limit_users_budget(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
 ; CHECK-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double*> poison, double* [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double*> [[TMP0]], double* [[B]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr double, <2 x double*> [[TMP1]], <2 x i64> <i64 0, i64 2>
 ; CHECK-NEXT:    [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
 ; CHECK-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
-; CHECK-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
 ; CHECK-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
 ; CHECK-NEXT:    [[D0:%.*]] = load double, double* [[IDXD0]], align 8
-; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
-; CHECK-NEXT:    [[B2:%.*]] = load double, double* [[IDXB2]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[TMP2]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
 ; CHECK-NEXT:    [[A2:%.*]] = load double, double* [[IDXA2]], align 8
 ; CHECK-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
-; CHECK-NEXT:    [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
-; CHECK-NEXT:    [[SUBC0D0:%.*]] = fsub fast double [[C0]], [[D0]]
-; CHECK-NEXT:    [[SUBA1B2:%.*]] = fsub fast double [[A1]], [[B2]]
-; CHECK-NEXT:    [[SUBA2B1:%.*]] = fsub fast double [[A2]], [[B1]]
-; CHECK-NEXT:    [[ADD0:%.*]] = fadd fast double [[SUBA0B0]], [[SUBC0D0]]
-; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast double [[SUBA1B2]], [[SUBA2B1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fsub fast <2 x double> [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A2]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B1]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP11]]
 ; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
 ; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
-; CHECK-NEXT:    store double [[ADD0]], double* [[IDXS0]], align 8
-; CHECK-NEXT:    store double [[ADD1]], double* [[IDXS1]], align 8
-; CHECK-NEXT:    store double [[A1]], double* [[EXT1:%.*]], align 8
-; CHECK-NEXT:    store double [[A1]], double* [[EXT2:%.*]], align 8
-; CHECK-NEXT:    store double [[A1]], double* [[EXT3:%.*]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT:    store double [[TMP14]], double* [[EXT1:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP14]], double* [[EXT2:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP14]], double* [[EXT3:%.*]], align 8
 ; CHECK-NEXT:    store double [[B1]], double* [[EXT4:%.*]], align 8
 ; CHECK-NEXT:    store double [[B1]], double* [[EXT5:%.*]], align 8
 ; CHECK-NEXT:    ret void


        


More information about the llvm-commits mailing list