[llvm] [SCEV] Don't add predicates already implied by UnionPredicate. (PR #93397)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sun May 26 17:33:53 PDT 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/93397

>From 7d6ae5bf946d5263c1d60e6aadae18a357a56a91 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 25 May 2024 14:32:04 +0100
Subject: [PATCH 1/3] [LV] Add test showing missed removal of implied
 predicate.

---
 .../LoopVectorize/scev-predicate-reasoning.ll | 108 ++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 6ae6645378b32..63b3bb14e550d 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -149,3 +149,111 @@ for.cond:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.cond
   ret void
 }
+
+ at h = global i64 0
+
+; TODO: Currently we generate SCEV check code for the same predicate twice.
+define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
+; CHECK-LABEL: define void @implied_wrap_predicate
+; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A3:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[C2:%.*]] = ptrtoint ptr [[C]] to i64
+; CHECK-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[A3]], 16
+; CHECK-NEXT:    [[UMAX4:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1))
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[UMAX4]], -9
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], [[A3]]
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK:       vector.scevcheck:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[A1]], 16
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1))
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[UMAX]], -9
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], [[A1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i64 [[TMP7]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i16
+; CHECK-NEXT:    [[TMP10:%.*]] = add i16 1, [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i16 [[TMP10]], 1
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 65535
+; CHECK-NEXT:    [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP8]] to i16
+; CHECK-NEXT:    [[TMP15:%.*]] = add i16 2, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i16 [[TMP15]], 2
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ugt i64 [[TMP8]], 65535
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = trunc i64 [[TMP8]] to i16
+; CHECK-NEXT:    [[TMP20:%.*]] = add i16 1, [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp ult i16 [[TMP20]], 1
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp ugt i64 [[TMP8]], 65535
+; CHECK-NEXT:    [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = or i1 [[TMP13]], [[TMP18]]
+; CHECK-NEXT:    [[TMP25:%.*]] = or i1 [[TMP24]], [[TMP23]]
+; CHECK-NEXT:    br i1 [[TMP25]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP26:%.*]] = sub i64 [[C2]], [[A3]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP26]], 32
+; CHECK-NEXT:    br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
+; CHECK-NEXT:    [[IND_END:%.*]] = add i16 1, [[DOTCAST]]
+; CHECK-NEXT:    [[IND_END5:%.*]] = add i64 1, [[N_VEC]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
+; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP27]]
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr i64, ptr [[TMP28]], i32 0
+; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP27]]
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr i64, ptr [[TMP30]], i32 0
+; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP31]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_EXT:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[IV_EXT_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV_EXT]]
+; CHECK-NEXT:    store i64 0, ptr [[GEP_A]], align 4
+; CHECK-NEXT:    [[GEP_C:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV_EXT]]
+; CHECK-NEXT:    store i64 0, ptr [[GEP_C]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
+; CHECK-NEXT:    [[IV_EXT_NEXT]] = zext i16 [[IV_NEXT]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV_EXT_NEXT]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt ptr [[GEP]], @h
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop ]
+  %iv.ext = phi i64 [ 1, %entry ], [ %iv.ext.next, %loop ]
+  %gep.A = getelementptr i64, ptr %A, i64 %iv.ext
+  store i64 0, ptr %gep.A
+  %gep.C = getelementptr i64, ptr %C, i64 %iv.ext
+  store i64 0, ptr %gep.C
+  %iv.next = add i16 %iv, 1
+  %iv.ext.next = zext i16 %iv.next to i64
+  %gep = getelementptr i64, ptr %A, i64 %iv.ext.next
+  %cmp = icmp ugt ptr %gep, @h
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}

>From fbf7d4b71e7cfa7327b32fc61f431c90fcfd900a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 25 May 2024 13:14:34 +0100
Subject: [PATCH 2/3] [SCEV] Don't add predicates already implied by
 UnionPredicate.

Update SCEVUnionPredicate::add to only add predicates from another union
predicate, if they aren't alread implied by the union predicate we add
them to.

Note that there exists logic elsewhere to avoid adding predicates if
they are already implied, but this logic misses cases when only some
predicates of a union predicate are implied by the current set of
predicates.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         |  6 +++-
 .../LoopVectorize/scev-predicate-reasoning.ll | 32 ++++++++-----------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b83e2b435f5d7..206dfac0379ea 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -14746,8 +14746,12 @@ void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
 
 void SCEVUnionPredicate::add(const SCEVPredicate *N) {
   if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
-    for (const auto *Pred : Set->Preds)
+    for (const auto *Pred : Set->Preds) {
+      // Skip predicates already implied by this union predicate.
+      if (implies(Pred))
+        continue;
       add(Pred);
+    }
     return;
   }
 
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 63b3bb14e550d..f44087ff632b4 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -184,17 +184,11 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i16 [[TMP15]], 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp ugt i64 [[TMP8]], 65535
 ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = trunc i64 [[TMP8]] to i16
-; CHECK-NEXT:    [[TMP20:%.*]] = add i16 1, [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = icmp ult i16 [[TMP20]], 1
-; CHECK-NEXT:    [[TMP22:%.*]] = icmp ugt i64 [[TMP8]], 65535
-; CHECK-NEXT:    [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]]
-; CHECK-NEXT:    [[TMP24:%.*]] = or i1 [[TMP13]], [[TMP18]]
-; CHECK-NEXT:    [[TMP25:%.*]] = or i1 [[TMP24]], [[TMP23]]
-; CHECK-NEXT:    br i1 [[TMP25]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP13]], [[TMP18]]
+; CHECK-NEXT:    br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[TMP26:%.*]] = sub i64 [[C2]], [[A3]]
-; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP26]], 32
+; CHECK-NEXT:    [[TMP20:%.*]] = sub i64 [[C2]], [[A3]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP20]], 32
 ; CHECK-NEXT:    br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
@@ -206,16 +200,16 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP27]]
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr i64, ptr [[TMP28]], i32 0
-; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP29]], align 4
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP27]]
-; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr i64, ptr [[TMP30]], i32 0
-; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP31]], align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr i64, ptr [[TMP22]], i32 0
+; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr i64, ptr [[TMP24]], i32 0
+; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]

>From 8c2589a0823d8b777db3be54a99418624c974804 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 26 May 2024 17:26:41 -0700
Subject: [PATCH 3/3] !fixup address comments, thanks!

---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 10 ++++-----
 .../LoopVectorize/scev-predicate-reasoning.ll | 22 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 206dfac0379ea..8d971e6a78e42 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -14746,16 +14746,14 @@ void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
 
 void SCEVUnionPredicate::add(const SCEVPredicate *N) {
   if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
-    for (const auto *Pred : Set->Preds) {
-      // Skip predicates already implied by this union predicate.
-      if (implies(Pred))
-        continue;
+    for (const auto *Pred : Set->Preds)
       add(Pred);
-    }
     return;
   }
 
-  Preds.push_back(N);
+  // Only add predicate if it is not already implied by this union predicate.
+  if (!implies(N))
+    Preds.push_back(N);
 }
 
 PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 8bdeeee64e167..e58c99dc4bc58 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -152,7 +152,6 @@ for.end:                                          ; preds = %for.cond
 
 @h = global i64 0
 
-; TODO: Currently we generate SCEV check code for the same predicate twice.
 define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
 ; CHECK-LABEL: define void @implied_wrap_predicate
 ; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
@@ -184,6 +183,11 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i16 [[TMP15]], 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp ugt i64 [[TMP8]], 65535
 ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP13]], [[TMP18]]
+; CHECK-NEXT:    br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP20:%.*]] = sub i64 [[C2]], [[A3]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP20]], 32
 ; CHECK-NEXT:    br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
@@ -195,6 +199,22 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr i64, ptr [[TMP22]], i32 0
+; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr i64, ptr [[TMP24]], i32 0
+; CHECK-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP25]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]



More information about the llvm-commits mailing list