[llvm] ab401a8 - [PGO][PGSO][LV] Fix loop not vectorized issue under profile guided size opts.

Hiroshi Yamauchi via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 19 12:33:52 PDT 2020


Author: Hiroshi Yamauchi
Date: 2020-08-19T12:13:34-07:00
New Revision: ab401a8c8a9cf2b9e81eb6864aa647d96751ae84

URL: https://github.com/llvm/llvm-project/commit/ab401a8c8a9cf2b9e81eb6864aa647d96751ae84
DIFF: https://github.com/llvm/llvm-project/commit/ab401a8c8a9cf2b9e81eb6864aa647d96751ae84.diff

LOG: [PGO][PGSO][LV] Fix loop not vectorized issue under profile guided size opts.

D81345 appears to accidentally disables vectorization when explicitly
enabled. As PGSO isn't currently accessible from LoopAccessInfo, revert back to
the vectorization with versioning-for-unit-stride for PGSO.

Differential Revision: https://reviews.llvm.org/D85784

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/optsize.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 90a8d4703d65..e1a82f5891ae 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2822,7 +2822,8 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
       return;
 
   assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
-           OptForSizeBasedOnProfile) &&
+           (OptForSizeBasedOnProfile &&
+            Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) &&
          "Cannot SCEV check stride or overflow when optimizing for size");
 
   SCEVCheckBlock->setName("vector.scevcheck");
@@ -7914,12 +7915,17 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
     BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
     AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
     LoopVectorizationLegality &LVL) {
-  bool OptSize =
-      F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
-                                                     PGSOQueryType::IRPass);
   // 1) OptSize takes precedence over all other options, i.e. if this is set,
   // don't look at hints or options, and don't request a scalar epilogue.
-  if (OptSize)
+  // (For PGSO, as shouldOptimizeForSize isn't currently accessible from
+  // LoopAccessInfo (due to code dependency and not being able to reliably get
+  // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
+  // of strides in LoopAccessInfo::analyzeLoop() and vectorize without
+  // versioning when the vectorization is forced, unlike hasOptSize. So revert
+  // back to the old way and vectorize with versioning when forced. See D81345.)
+  if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+                                                      PGSOQueryType::IRPass) &&
+                          Hints.getForce() != LoopVectorizeHints::FK_Enabled))
     return CM_ScalarEpilogueNotAllowedOptSize;
 
   bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&

diff  --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index b4233e6751cb..9b24bb4e51ed 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -284,6 +284,34 @@ for.end:
   ret void
 }
 
+; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
+;
+define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 {
+; CHECK-LABEL: @stride1_pgso(
+; CHECK: vector.body
+;
+; PGSO-LABEL: @stride1_pgso(
+; PGSO: vector.body
+;
+; NPGSO-LABEL: @stride1_pgso(
+; NPGSO: vector.body
+
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %mulB = mul nsw i32 %iv, %BStride
+  %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
+  store i16 42, i16* %gepOfB, align 4
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, 1025
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
+
+for.end:
+  ret void
+}
+
 ; PR46652: Check that the need for stride==1 check prevents vectorizing a loop
 ; having tiny trip count, when compiling w/o -Os/-Oz.
 ; CHECK-LABEL: @pr46652


        


More information about the llvm-commits mailing list