[llvm] [LoopVectorize] Amend check for IV increments in collectUsersInEntryB… (PR #108020)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 11 06:40:11 PDT 2024


https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/108020

>From c3a5eb31e2ac4d2c75fb023be33ab8d65393df2e Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 10 Sep 2024 11:09:30 +0000
Subject: [PATCH 1/3] [LV] Amend check for IV increments in
 collectUsersInEntryBlock

The check for IV increments in collectUsersInEntryBlock currently
triggers for exit-block PHIs which use the IV start value, resulting in
us failing to add the input value for the middle block to these PHIs.

Fix this by amending the check for IV increments to only include incoming
values that are instructions inside the loop.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  1 +
 .../LoopVectorize/use-iv-start-value.ll       | 84 +++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2be3b577529258..054a170c36863c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8662,6 +8662,7 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
          !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
         isa<VPWidenPointerInductionRecipe>(V) ||
         (isa<Instruction>(IncomingValue) &&
+         OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
          any_of(IncomingValue->users(), [&Inductions](User *U) {
            auto *P = dyn_cast<PHINode>(U);
            return P && Inductions.contains(P);
diff --git a/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
new file mode 100644
index 00000000000000..c7ea86533570b0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
+
+; Check that we correctly handle the use of %start2 in the exit block, and do
+; not crash.
+
+define i64 @foo(i64 %start, i64 %end) {
+; CHECK-LABEL: define i64 @foo(
+; CHECK-SAME: i64 [[START:%.*]], i64 [[END:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[P1:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT:    [[P2:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    [[START2:%.*]] = and i64 [[START]], 12345
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[END]], [[START2]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[IND_END:%.*]] = add i64 [[START2]], [[N_VEC]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[START2]], [[INDEX]]
+; CHECK-NEXT:    [[IND:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[IND]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[IND]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START2]], %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[IND1:%.*]] = phi i64 [ [[IND_NEXT1:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[IND1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[IND1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[IND_NEXT1]] = add i64 [[IND1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[IND_NEXT1]], [[END]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[USE:%.*]] = phi i64 [ [[START2]], %[[FOR_BODY]] ], [ [[START2]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[USE]]
+;
+entry:
+  %p1 = alloca [1024 x i32]
+  %p2 = alloca [1024 x i32]
+  call void @init_mem(ptr %p1, i64 1024)
+  call void @init_mem(ptr %p2, i64 1024)
+  %start2 = and i64 %start, 12345
+  br label %for.body
+
+for.body:
+  %ind = phi i64 [ %ind.next, %for.body ], [ %start2, %entry ]
+  %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind
+  %0 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind
+  %1 = load i32, ptr %arrayidx2, align 4
+  %ind.next = add i64 %ind, 1
+  %cmp = icmp ne i64 %ind.next, %end
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  %use =  phi i64 [ %start2, %for.body ]
+  ret i64 %use
+}
+
+declare void @init_mem(ptr, i64)
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

>From 149f7e61a65870fc3efeb1513e5310d0106d8e24 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 10 Sep 2024 16:32:41 +0000
Subject: [PATCH 2/3] Review: Pass pointers as arguments in lit test

---
 .../Transforms/LoopVectorize/use-iv-start-value.ll | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
index c7ea86533570b0..d462b8d3a25ff0 100644
--- a/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
+++ b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
@@ -4,14 +4,10 @@
 ; Check that we correctly handle the use of %start2 in the exit block, and do
 ; not crash.
 
-define i64 @foo(i64 %start, i64 %end) {
+define i64 @foo(ptr %p1, ptr %p2, i64 %start, i64 %end) {
 ; CHECK-LABEL: define i64 @foo(
-; CHECK-SAME: i64 [[START:%.*]], i64 [[END:%.*]]) {
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]], i64 [[START:%.*]], i64 [[END:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P1:%.*]] = alloca [1024 x i32], align 4
-; CHECK-NEXT:    [[P2:%.*]] = alloca [1024 x i32], align 4
-; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
-; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
 ; CHECK-NEXT:    [[START2:%.*]] = and i64 [[START]], 12345
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[END]], [[START2]]
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
@@ -52,10 +48,6 @@ define i64 @foo(i64 %start, i64 %end) {
 ; CHECK-NEXT:    ret i64 [[USE]]
 ;
 entry:
-  %p1 = alloca [1024 x i32]
-  %p2 = alloca [1024 x i32]
-  call void @init_mem(ptr %p1, i64 1024)
-  call void @init_mem(ptr %p2, i64 1024)
   %start2 = and i64 %start, 12345
   br label %for.body
 
@@ -74,8 +66,6 @@ exit:
   ret i64 %use
 }
 
-declare void @init_mem(ptr, i64)
-
 ;.
 ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}

>From 65774f5bfee748b25da25d27ecc14e7dcd0f7041 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 11 Sep 2024 13:31:37 +0000
Subject: [PATCH 3/3] Update: reorder phi entries in %for.body

---
 llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
index d462b8d3a25ff0..3d2c2e5e9b5761 100644
--- a/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
+++ b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
@@ -35,7 +35,7 @@ define i64 @foo(ptr %p1, ptr %p2, i64 %start, i64 %end) {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START2]], %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[IND1:%.*]] = phi i64 [ [[IND_NEXT1:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[IND1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IND_NEXT1:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[IND1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[IND1]]
@@ -52,7 +52,7 @@ entry:
   br label %for.body
 
 for.body:
-  %ind = phi i64 [ %ind.next, %for.body ], [ %start2, %entry ]
+  %ind = phi i64 [ %start2, %entry ], [ %ind.next, %for.body ]
   %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind
   %0 = load i32, ptr %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind



More information about the llvm-commits mailing list