[llvm-branch-commits] [llvm] 71bd59f - [SVE] Add support for scalable vectors with vectorize.scalable.enable loop attribute

Wed Dec 2 05:29:06 PST 2020

Author: David Sherwood
Date: 2020-12-02T13:23:43Z
New Revision: 71bd59f0cb6db0211418b07127e8b311d944e2c2

URL: https://github.com/llvm/llvm-project/commit/71bd59f0cb6db0211418b07127e8b311d944e2c2
DIFF: https://github.com/llvm/llvm-project/commit/71bd59f0cb6db0211418b07127e8b311d944e2c2.diff

LOG: [SVE] Add support for scalable vectors with vectorize.scalable.enable loop attribute

In this patch I have added support for a new loop hint called
vectorize.scalable.enable that says whether we should enable scalable
vectorization or not. If a user wants to instruct the compiler to
vectorize a loop with scalable vectors they can now do this as
follows:

  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
  ...
  !2 = !{!2, !3, !4}
  !3 = !{!"llvm.loop.vectorize.width", i32 8}
  !4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

Setting the hint to false simply reverts the behaviour back to the
default, using fixed width vectors.

Differential Revision: https://reviews.llvm.org/D88962

Added: 
    llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll

Modified: 
    llvm/docs/LangRef.rst
    llvm/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
    llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
    llvm/lib/Transforms/Utils/LoopUtils.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/metadata-width.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 1964f2416b8f..03a8387d9d1f 100644

--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5956,6 +5956,21 @@ vectorization:
    !0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0}
    !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1}
 
+'``llvm.loop.vectorize.scalable.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata selectively enables or disables scalable vectorization for the
+loop, and only has any effect if vectorization for the loop is already enabled.
+The first operand is the string ``llvm.loop.vectorize.scalable.enable``
+and the second operand is a bit. If the bit operand value is 1 scalable
+vectorization is enabled, whereas a value of 0 reverts to the default fixed
+width vectorization:
+
+.. code-block:: llvm
+
+   !0 = !{!"llvm.loop.vectorize.scalable.enable", i1 0}
+   !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 1}
+
 '``llvm.loop.vectorize.width``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 360e262e8ae0..ef348ed56129 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -213,6 +213,13 @@ Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
 /// Find named metadata for a loop with an integer value.
 llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
 
+/// Find a combination of metadata ("llvm.loop.vectorize.width" and
+/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
+/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
+/// then None is returned.
+Optional<ElementCount>
+getOptionalElementCountLoopAttribute(Loop *TheLoop);
+
 /// Create a new loop identifier for a loop created from a loop transformation.
 ///
 /// @param OrigLoopID The loop ID of the loop before the transformation.

diff  --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 2be9ef10ac4f..f701e08961a0 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -29,6 +29,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Support/TypeSize.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
 namespace llvm {
@@ -43,8 +44,14 @@ namespace llvm {
 /// for example 'force', means a decision has been made. So, we need to be
 /// careful NOT to add them if the user hasn't specifically asked so.
 class LoopVectorizeHints {
-  enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
-                  HK_PREDICATE };
+  enum HintKind {
+    HK_WIDTH,
+    HK_UNROLL,
+    HK_FORCE,
+    HK_ISVECTORIZED,
+    HK_PREDICATE,
+    HK_SCALABLE
+  };
 
   /// Hint - associates name and validation with the hint value.
   struct Hint {
@@ -73,6 +80,9 @@ class LoopVectorizeHints {
   /// Vector Predicate
   Hint Predicate;
 
+  /// Says whether we should use fixed width or scalable vectorization.
+  Hint Scalable;
+
   /// Return the loop metadata prefix.
   static StringRef Prefix() { return "llvm.loop."; }
 
@@ -98,7 +108,9 @@ class LoopVectorizeHints {
   /// Dumps all the hint information.
   void emitRemarkWithHints() const;
 
-  unsigned getWidth() const { return Width.Value; }
+  ElementCount getWidth() const {
+    return ElementCount::get(Width.Value, isScalable());
+  }
   unsigned getInterleave() const { return Interleave.Value; }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
   unsigned getPredicate() const { return Predicate.Value; }
@@ -109,6 +121,8 @@ class LoopVectorizeHints {
     return (ForceKind)Force.Value;
   }
 
+  bool isScalable() const { return Scalable.Value; }
+
   /// If hints are provided that force vectorization, use the AlwaysPrint
   /// pass name to force the frontend to print the diagnostic.
   const char *vectorizeAnalysisPassName() const;
@@ -119,7 +133,9 @@ class LoopVectorizeHints {
     // enabled by default because can be unsafe or inefficient. For example,
     // reordering floating-point operations will change the way round-off
     // error accumulates in the loop.
-    return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+    ElementCount EC = getWidth();
+    return getForce() == LoopVectorizeHints::FK_Enabled ||
+           EC.getKnownMinValue() > 1;
   }
 
   bool isPotentiallyUnsafe() const {

diff  --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 7c81e6352dec..80a7d3a43ad6 100644
--- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -48,12 +48,12 @@ static void warnAboutLeftoverTransformations(Loop *L,
 
   if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
     LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
-    Optional<int> VectorizeWidth =
-        getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+    Optional<ElementCount> VectorizeWidth =
+        getOptionalElementCountLoopAttribute(L);
     Optional<int> InterleaveCount =
         getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
 
-    if (VectorizeWidth.getValueOr(0) != 1)
+    if (!VectorizeWidth || VectorizeWidth->isVector())
       ORE->emit(
           DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
                                             "FailedRequestedVectorization",

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c2c57137cf43..1ac270814b00 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -301,6 +301,21 @@ bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
   return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
 }
 
+Optional<ElementCount>
+llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
+  Optional<int> Width =
+      getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
+
+  if (Width.hasValue()) {
+    Optional<int> IsScalable = getOptionalIntLoopAttribute(
+        TheLoop, "llvm.loop.vectorize.scalable.enable");
+    return ElementCount::get(*Width,
+                             IsScalable.hasValue() ? *IsScalable : false);
+  }
+
+  return None;
+}
+
 llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
                                                       StringRef Name) {
   const MDOperand *AttrMD =
@@ -450,14 +465,15 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
   if (Enable == false)
     return TM_SuppressedByUser;
 
-  Optional<int> VectorizeWidth =
-      getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+  Optional<ElementCount> VectorizeWidth =
+      getOptionalElementCountLoopAttribute(L);
   Optional<int> InterleaveCount =
       getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
 
   // 'Forcing' vector width and interleave count to one effectively disables
   // this tranformation.
-  if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+  if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
+      InterleaveCount == 1)
     return TM_SuppressedByUser;
 
   if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
@@ -466,10 +482,10 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
   if (Enable == true)
     return TM_ForcedByUser;
 
-  if (VectorizeWidth == 1 && InterleaveCount == 1)
+  if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
     return TM_Disable;
 
-  if (VectorizeWidth > 1 || InterleaveCount > 1)
+  if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
     return TM_Enable;
 
   if (hasDisableAllTransformsHint(L))

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 622f5b6466e4..60e1cc9a4a59 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -66,6 +66,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
     return (Val <= 1);
   case HK_ISVECTORIZED:
   case HK_PREDICATE:
+  case HK_SCALABLE:
     return (Val == 0 || Val == 1);
   }
   return false;
@@ -78,7 +79,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
       Force("vectorize.enable", FK_Undefined, HK_FORCE),
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
-      Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
+      Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
+      Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
       ORE(ORE) {
   // Populate values with existing loop metadata.
   getHintsFromMetadata();
@@ -91,7 +93,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
     // If the vectorization width and interleaving count are both 1 then
     // consider the loop to have been already vectorized because there's
     // nothing more that we can do.
-    IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
+    IsVectorized.Value =
+        getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
              << "LV: Interleaving disabled by the pass manager\n");
 }
@@ -164,7 +167,7 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
         R << " (Force=" << NV("Force", true);
         if (Width.Value != 0)
-          R << ", Vector Width=" << NV("VectorWidth", Width.Value);
+          R << ", Vector Width=" << NV("VectorWidth", getWidth());
         if (Interleave.Value != 0)
           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
         R << ")";
@@ -175,11 +178,11 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
 }
 
 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
-  if (getWidth() == 1)
+  if (getWidth() == ElementCount::getFixed(1))
     return LV_NAME;
   if (getForce() == LoopVectorizeHints::FK_Disabled)
     return LV_NAME;
-  if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
+  if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
     return LV_NAME;
   return OptimizationRemarkAnalysis::AlwaysPrint;
 }
@@ -230,7 +233,8 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
     return;
   unsigned Val = C->getZExtValue();
 
-  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
+  Hint *Hints[] = {&Width,        &Interleave, &Force,
+                   &IsVectorized, &Predicate,  &Scalable};
   for (auto H : Hints) {
     if (Name == H->Name) {
       if (H->validate(Val))

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a3efb7026043..19e0ba54826a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8314,11 +8314,16 @@ static bool processLoopInVPlanNativePath(
   LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
 
   // Get user vectorization factor.
-  const unsigned UserVF = Hints.getWidth();
+  ElementCount UserVF = Hints.getWidth();
+  if (UserVF.isScalable()) {
+    // TODO: Use scalable UserVF once we've added initial support for scalable
+    // vectorization. For now we convert it to fixed width, but this will be
+    // removed in a later patch.
+    UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
+  }
 
   // Plan how to best vectorize, return the best VF and its cost.
-  const VectorizationFactor VF =
-      LVP.planInVPlanNativePath(ElementCount::getFixed(UserVF));
+  const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF);
 
   // If we are stress testing VPlan builds, do not attempt to generate vector
   // code. Masked vector code generation support will follow soon.
@@ -8480,12 +8485,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
 
   // Get user vectorization factor and interleave count.
-  unsigned UserVF = Hints.getWidth();
+  ElementCount UserVF = Hints.getWidth();
+  if (UserVF.isScalable()) {
+    // TODO: Use scalable UserVF once we've added initial support for scalable
+    // vectorization. For now we convert it to fixed width, but this will be
+    // removed in a later patch.
+    UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
+  }
+
   unsigned UserIC = Hints.getInterleave();
 
   // Plan how to best vectorize, return the best VF and its cost.
-  Optional<VectorizationFactor> MaybeVF =
-      LVP.plan(ElementCount::getFixed(UserVF), UserIC);
+  Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
 
   VectorizationFactor VF = VectorizationFactor::Disabled();
   unsigned IC = 1;

diff  --git a/llvm/test/Transforms/LoopVectorize/metadata-width.ll b/llvm/test/Transforms/LoopVectorize/metadata-width.ll
index 7c7973247ef7..db9c6c9d862a 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata-width.ll
@@ -24,7 +24,55 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
+; CHECK-LABEL: @test2(
+; CHECK: store <8 x i32>
+; CHECK: ret void
+define void @test2(i32* nocapture %a, i32 %n) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: store <8 x i32>
+; CHECK: ret void
+define void @test3(i32* nocapture %a, i32 %n) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !0 = !{!0, !1}
 !1 = !{!"llvm.loop.vectorize.width", i32 8}
+!2 = !{!2, !1, !3}
+!3 = !{!"llvm.loop.vectorize.scalable.enable", i32 1}
+!4 = !{!4, !1, !5}
+!5 = !{!"llvm.loop.vectorize.scalable.enable", i32 0}

diff  --git a/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll b/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
new file mode 100644
index 000000000000..d182e9c38cf2
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -loop-vectorize -transform-warning -S 2>&1 | FileCheck %s
+
+; Like no_array_bounds.ll we verify warnings are generated when vectorization/interleaving is
+; explicitly specified and fails to occur for both fixed and scalable vectorize.width loop hints.
+
+;  #pragma clang loop vectorize(enable)
+;  for (int i = 0; i < number; i++) {
+;    A[B[i]]++;
+;  }
+
+; CHECK: warning: <unknown>:0:0: loop not interleaved: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+define dso_local void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+entry:
+  %cmp7 = icmp sgt i32 %N, 0
+  br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %N to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %0 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
+  %1 = load i32, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK: warning: <unknown>:0:0: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+define dso_local void @foo2(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+entry:
+  %cmp7 = icmp sgt i32 %N, 0
+  br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %N to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %0 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
+  %1 = load i32, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !3
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.vectorize.width", i32 1}
+!3 = distinct !{!3, !1, !2, !4}
+!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}