[llvm] ddb3b26 - [LV] Consider Loop Unroll Hints When Making Interleave Decisions

Bardia Mahjour via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 28 14:28:10 PDT 2021


Author: Bardia Mahjour
Date: 2021-04-28T17:27:52-04:00
New Revision: ddb3b26a12694a61611223eb3a84532762cbe4b8

URL: https://github.com/llvm/llvm-project/commit/ddb3b26a12694a61611223eb3a84532762cbe4b8
DIFF: https://github.com/llvm/llvm-project/commit/ddb3b26a12694a61611223eb3a84532762cbe4b8.diff

LOG: [LV] Consider Loop Unroll Hints When Making Interleave Decisions

This patch causes the loop vectorizer to not interleave loops that have
nounroll loop hints (llvm.loop.unroll.disable and llvm.loop.unroll_count(1)).
Note that if a particular interleave count is being requested
(through llvm.loop.interleave_count), it will still be honoured, regardless
of the presence of nounroll hints.

Reviewed By: Meinersbur

Differential Revision: https://reviews.llvm.org/D101374

Added: 
    llvm/test/Transforms/LoopVectorize/nounroll.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
    llvm/lib/Transforms/Utils/LoopUtils.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
    llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 067da68bd8de4..8cea4cc93e40c 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -219,14 +219,15 @@ Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
                                                       StringRef Name);
 
 /// Find named metadata for a loop with an integer value.
-llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
+llvm::Optional<int> getOptionalIntLoopAttribute(const Loop *TheLoop,
+                                                StringRef Name);
 
 /// Find a combination of metadata ("llvm.loop.vectorize.width" and
 /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
 /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
 /// then None is returned.
 Optional<ElementCount>
-getOptionalElementCountLoopAttribute(Loop *TheLoop);
+getOptionalElementCountLoopAttribute(const Loop *TheLoop);
 
 /// Create a new loop identifier for a loop created from a loop transformation.
 ///
@@ -295,11 +296,11 @@ enum TransformationMode {
 
 /// @{
 /// Get the mode for LLVM's supported loop transformations.
-TransformationMode hasUnrollTransformation(Loop *L);
-TransformationMode hasUnrollAndJamTransformation(Loop *L);
-TransformationMode hasVectorizeTransformation(Loop *L);
-TransformationMode hasDistributeTransformation(Loop *L);
-TransformationMode hasLICMVersioningTransformation(Loop *L);
+TransformationMode hasUnrollTransformation(const Loop *L);
+TransformationMode hasUnrollAndJamTransformation(const Loop *L);
+TransformationMode hasVectorizeTransformation(const Loop *L);
+TransformationMode hasDistributeTransformation(const Loop *L);
+TransformationMode hasLICMVersioningTransformation(const Loop *L);
 /// @}
 
 /// Set input string into loop metadata by keeping other values intact.

diff  --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index bfaad81771ecc..d92fec8977c80 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -46,7 +46,7 @@ namespace llvm {
 class LoopVectorizeHints {
   enum HintKind {
     HK_WIDTH,
-    HK_UNROLL,
+    HK_INTERLEAVE,
     HK_FORCE,
     HK_ISVECTORIZED,
     HK_PREDICATE,
@@ -111,7 +111,15 @@ class LoopVectorizeHints {
   ElementCount getWidth() const {
     return ElementCount::get(Width.Value, isScalable());
   }
-  unsigned getInterleave() const { return Interleave.Value; }
+  unsigned getInterleave() const {
+    if (Interleave.Value)
+      return Interleave.Value;
+    // If interleaving is not explicitly set, assume that if we do not want
+    // unrolling, we also don't want any interleaving.
+    if (llvm::hasUnrollTransformation(TheLoop) & TM_Disable)
+      return 1;
+    return 0;
+  }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
   unsigned getPredicate() const { return Predicate.Value; }
   enum ForceKind getForce() const {

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 19d9e82dbf033..252063a0212ad 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -298,7 +298,7 @@ bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
 }
 
 Optional<ElementCount>
-llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
+llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
   Optional<int> Width =
       getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
 
@@ -311,7 +311,7 @@ llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
   return None;
 }
 
-llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
+llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
                                                       StringRef Name) {
   const MDOperand *AttrMD =
       findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
@@ -418,7 +418,7 @@ bool llvm::hasMustProgress(const Loop *L) {
   return getBooleanLoopAttribute(L, LLVMLoopMustProgress);
 }
 
-TransformationMode llvm::hasUnrollTransformation(Loop *L) {
+TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
   if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
     return TM_SuppressedByUser;
 
@@ -439,7 +439,7 @@ TransformationMode llvm::hasUnrollTransformation(Loop *L) {
   return TM_Unspecified;
 }
 
-TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
+TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
   if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
     return TM_SuppressedByUser;
 
@@ -457,7 +457,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
   return TM_Unspecified;
 }
 
-TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
+TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
   Optional<bool> Enable =
       getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
 
@@ -493,7 +493,7 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
   return TM_Unspecified;
 }
 
-TransformationMode llvm::hasDistributeTransformation(Loop *L) {
+TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
   if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
     return TM_ForcedByUser;
 
@@ -503,7 +503,7 @@ TransformationMode llvm::hasDistributeTransformation(Loop *L) {
   return TM_Unspecified;
 }
 
-TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
+TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
   if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
     return TM_SuppressedByUser;
 

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 016f61af791d7..2cb4e4f824dd2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -57,7 +57,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
   switch (Kind) {
   case HK_WIDTH:
     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
-  case HK_UNROLL:
+  case HK_INTERLEAVE:
     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
   case HK_FORCE:
     return (Val <= 1);
@@ -73,7 +73,7 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
                                        bool InterleaveOnlyWhenForced,
                                        OptimizationRemarkEmitter &ORE)
     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
-      Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
+      Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
       Force("vectorize.enable", FK_Undefined, HK_FORCE),
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
@@ -91,8 +91,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
     // consider the loop to have been already vectorized because there's
     // nothing more that we can do.
     IsVectorized.Value =
-        getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
-  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
+        getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
+  LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
              << "LV: Interleaving disabled by the pass manager\n");
 }
 
@@ -165,8 +165,8 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
         R << " (Force=" << NV("Force", true);
         if (Width.Value != 0)
           R << ", Vector Width=" << NV("VectorWidth", getWidth());
-        if (Interleave.Value != 0)
-          R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
+        if (getInterleave() != 0)
+          R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
         R << ")";
       }
       return R;

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2e249d778f49c..7a430f533961e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9735,7 +9735,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                             ? "enabled"
                             : "?"))
              << " width=" << Hints.getWidth()
-             << " unroll=" << Hints.getInterleave() << "\n");
+             << " interleave=" << Hints.getInterleave() << "\n");
 
   // Function containing loop
   Function *F = L->getHeader()->getParent();

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
index 177c120b3df82..8618ed90d7f6c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
@@ -35,7 +35,7 @@ for.body:                                         ; preds = %for.body.preheader,
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8
   %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8
-  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8
 }
 
 ; Function Attrs: nounwind readonly ssp uwtable
@@ -92,8 +92,6 @@ attributes #0 = { nounwind }
 !14 = !{!"Simple C/C++ TBAA"}
 !15 = !DILocation(line: 6, column: 19, scope: !4)
 !16 = !DILocation(line: 6, column: 11, scope: !4)
-!17 = distinct !{!17, !18}
-!18 = !{!"llvm.loop.unroll.disable"}
 !19 = !DILocation(line: 16, column: 20, scope: !20)
 !20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, unit: !28, retainedNodes: !7)
 !21 = !DILocation(line: 16, column: 3, scope: !20)
@@ -101,7 +99,7 @@ attributes #0 = { nounwind }
 !23 = !DILocation(line: 20, column: 3, scope: !20)
 !24 = !DILocation(line: 17, column: 19, scope: !20)
 !25 = !DILocation(line: 17, column: 11, scope: !20)
-!26 = distinct !{!26, !27, !18}
+!26 = distinct !{!26, !27}
 !27 = !{!"llvm.loop.vectorize.enable", i1 true}
 !28 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
                              file: !5,

diff  --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
index 33527b307dae3..e07222718579e 100644
--- a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
+++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
@@ -19,7 +19,7 @@
 ; Case 1: Annotated outer loop WITH vector width information must be collected.
 
 ; CHECK-LABEL: vector_width
-; CHECK: LV: Loop hints: force=enabled width=4 unroll=0
+; CHECK: LV: Loop hints: force=enabled width=4 interleave=0
 ; CHECK: LV: We can vectorize this outer loop!
 ; CHECK: LV: Using user VF 4 to build VPlans.
 ; CHECK-NOT: LV: Loop hints: force=?
@@ -71,7 +71,7 @@ for.end15:                                        ; preds = %outer.inc, %entry
 ; Case 2: Annotated outer loop WITHOUT vector width information must be collected.
 
 ; CHECK-LABEL: case2
-; CHECK: LV: Loop hints: force=enabled width=0 unroll=0
+; CHECK: LV: Loop hints: force=enabled width=0 interleave=0
 ; CHECK: LV: We can vectorize this outer loop!
 ; CHECK: LV: Using VF 1 to build VPlans.
 

diff  --git a/llvm/test/Transforms/LoopVectorize/nounroll.ll b/llvm/test/Transforms/LoopVectorize/nounroll.ll
new file mode 100644
index 0000000000000..52d82c88a4899
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/nounroll.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -passes='loop-vectorize' -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+
+; CHECK: LV: Checking a loop in "f1"
+; CHECK: LV: Loop hints: force=? width=0 interleave=1
+define dso_local void @f1(i32 signext %n, i32* %A) {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !1
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+; CHECK: LV: Checking a loop in "f2"
+; CHECK: LV: Loop hints: force=? width=0 interleave=4
+define dso_local void @f2(i32 signext %n, i32* %A) {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !3
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+; CHECK: LV: Checking a loop in "f3"
+; CHECK: LV: Loop hints: force=? width=0 interleave=1
+define dso_local void @f3(i32 signext %n, i32* %A) {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !6
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.unroll.disable"}
+!3 = distinct !{!3, !4, !5}
+!4 = !{!"llvm.loop.unroll.disable"}
+!5 = !{!"llvm.loop.interleave.count", i32 4}
+!6 = distinct !{!6, !7, !8}
+!7 = !{!"llvm.loop.mustprogress"}
+!8 = !{!"llvm.loop.unroll.count", i32 1}


        


More information about the llvm-commits mailing list