[llvm] [LoopVectorize] Amend check for IV increments in collectUsersInEntryB… (PR #108020)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 10 06:04:57 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Hari Limaye (hazzlim)

<details>
<summary>Changes</summary>

…lock

The check for IV increments in collectUsersInEntryBlock currently triggers for exit-block PHIs which use the IV start value, resulting in us failing to add the input value for the middle block to these PHIs.

Fix this by amending the check for IV increments to only include incoming values that are instructions inside the loop.

Fixes #<!-- -->108004

---
Full diff: https://github.com/llvm/llvm-project/pull/108020.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1) 
- (added) llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll (+84) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2be3b577529258..054a170c36863c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8662,6 +8662,7 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
          !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
         isa<VPWidenPointerInductionRecipe>(V) ||
         (isa<Instruction>(IncomingValue) &&
+         OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
          any_of(IncomingValue->users(), [&Inductions](User *U) {
            auto *P = dyn_cast<PHINode>(U);
            return P && Inductions.contains(P);
diff --git a/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
new file mode 100644
index 00000000000000..c7ea86533570b0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
+
+; Check that we correctly handle the use of %start2 in the exit block, and do
+; not crash.
+
+define i64 @foo(i64 %start, i64 %end) {
+; CHECK-LABEL: define i64 @foo(
+; CHECK-SAME: i64 [[START:%.*]], i64 [[END:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[P1:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT:    [[P2:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    [[START2:%.*]] = and i64 [[START]], 12345
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[END]], [[START2]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[IND_END:%.*]] = add i64 [[START2]], [[N_VEC]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[START2]], [[INDEX]]
+; CHECK-NEXT:    [[IND:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[IND]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[IND]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START2]], %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[IND1:%.*]] = phi i64 [ [[IND_NEXT1:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[IND1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[IND1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[IND_NEXT1]] = add i64 [[IND1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[IND_NEXT1]], [[END]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[USE:%.*]] = phi i64 [ [[START2]], %[[FOR_BODY]] ], [ [[START2]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[USE]]
+;
+entry:
+  %p1 = alloca [1024 x i32]
+  %p2 = alloca [1024 x i32]
+  call void @init_mem(ptr %p1, i64 1024)
+  call void @init_mem(ptr %p2, i64 1024)
+  %start2 = and i64 %start, 12345
+  br label %for.body
+
+for.body:
+  %ind = phi i64 [ %ind.next, %for.body ], [ %start2, %entry ]
+  %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind
+  %0 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind
+  %1 = load i32, ptr %arrayidx2, align 4
+  %ind.next = add i64 %ind, 1
+  %cmp = icmp ne i64 %ind.next, %end
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  %use =  phi i64 [ %start2, %for.body ]
+  ret i64 %use
+}
+
+declare void @init_mem(ptr, i64)
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/108020


More information about the llvm-commits mailing list