[llvm] 2ab910c - [LV] Check pointer user are in loop when checking for uniform pointers.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 13 01:24:31 PDT 2024
Author: Florian Hahn
Date: 2024-08-13T09:23:44+01:00
New Revision: 2ab910c08c4ed43d6fd9eb2ef9b3ff52e0293cf9
URL: https://github.com/llvm/llvm-project/commit/2ab910c08c4ed43d6fd9eb2ef9b3ff52e0293cf9
DIFF: https://github.com/llvm/llvm-project/commit/2ab910c08c4ed43d6fd9eb2ef9b3ff52e0293cf9.diff
LOG: [LV] Check pointer user are in loop when checking for uniform pointers.
Widening decisions are not set for users outside the loop. Avoid
crashing by only calling isVectorizedMemAccessUse for users in the loop.
Fixes https://github.com/llvm/llvm-project/issues/102934.
Added:
llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5547116133e8da..0d1262fa187298 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3705,7 +3705,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
auto *I = cast<Instruction>(V);
auto UsersAreMemAccesses =
llvm::all_of(I->users(), [&](User *U) -> bool {
- return isVectorizedMemAccessUse(cast<Instruction>(U), V);
+ auto *UI = cast<Instruction>(U);
+ return TheLoop->contains(UI) && isVectorizedMemAccessUse(UI, V);
});
if (UsersAreMemAccesses)
addToWorklistIfAllowed(I);
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll
new file mode 100644
index 00000000000000..637b985b4562ed
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
+
+; Test case for https://github.com/llvm/llvm-project/issues/102934.
+define void @gep_use_in_dead_block(ptr noalias %dst, ptr %src) {
+; CHECK-LABEL: define void @gep_use_in_dead_block(
+; CHECK-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i32 0
+; CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> zeroinitializer, ptr [[TMP12]], i32 2, <4 x i1> [[TMP7]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP_SRC]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 10
+; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[DEAD:.*]]:
+; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
+; CHECK-NEXT: br label %[[DEAD]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 99
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.src = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep.src
+ %c = icmp eq i16 %l, 10
+ br i1 %c, label %loop.latch, label %then
+
+then:
+ %gep.dst = getelementptr i16, ptr %dst, i64 %iv
+ store i16 0, ptr %gep.dst, align 2
+ br label %loop.latch
+
+dead:
+ store i16 0, ptr %gep.dst, align 2
+ br label %dead
+
+loop.latch:
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 99
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @gep_use_outside_loop(ptr noalias %dst, ptr %src) {
+; CHECK-LABEL: define void @gep_use_outside_loop(
+; CHECK-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0
+; CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <4 x i1> [[TMP5]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 3
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP_SRC]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 10
+; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 99
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[GEP_DST_LCSSA:%.*]] = phi ptr [ [[GEP_DST]], %[[LOOP_LATCH]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: store i16 0, ptr [[GEP_DST_LCSSA]], align 2
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.dst = getelementptr i16, ptr %dst, i64 %iv
+ %gep.src = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep.src
+ %c = icmp eq i16 %l, 10
+ br i1 %c, label %loop.latch, label %then
+
+then:
+ store i16 0, ptr %gep.dst, align 2
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 99
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ store i16 0, ptr %gep.dst, align 2
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list