[llvm] [SLP][REVEC] Fix insertelement legality checks (PR #146921)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 3 09:31:11 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms
Author: Gaƫtan Bossu (gbossu)
<details>
<summary>Changes</summary>
The current code assumes that all the values in VL are valid instructions, while it is possible to get poison.
---
Full diff: https://github.com/llvm/llvm-project/pull/146921.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+4)
- (added) llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll (+88)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0941bf61953f1..921668d0d9c43 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9060,6 +9060,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
// different vectors.
ValueSet SourceVectors;
for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement/poison vector.\n");
+ return TreeEntry::NeedToGather;
+ }
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
assert(getElementIndex(V) != std::nullopt &&
"Non-constant or undef index?");
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll
new file mode 100644
index 0000000000000..374d9ed6efe98
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-1000 --debug %s | FileCheck %s
+
+; The 4 stores can be re-vectorised, make sure the poison sources
+; are safely handled when trying to vectorise [ %0, poison, poison, %1 ]
+define void @test_missing_lanes_1_2(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_missing_lanes_1_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: store <8 x i32> poison, ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> poison, ptr %getelementptr1, align 4
+ %getelementptr2 = getelementptr i32, ptr %ptr, i64 8
+ store <4 x i32> poison, ptr %getelementptr2, align 4
+ %getelementptr3 = getelementptr i32, ptr %ptr, i64 12
+ store <4 x i32> %1, ptr %getelementptr3, align 4
+
+ ret void
+}
+
+; The 4 stores can be re-vectorised, make sure the poison sources
+; are safely handled when trying to vectorise [ %0, poison, %1, poison ]
+define void @test_missing_lanes_1_3(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_missing_lanes_1_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP1]], i64 4)
+; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
+; CHECK-NEXT: store <4 x i32> poison, ptr [[GETELEMENTPTR3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> poison, ptr %getelementptr1, align 4
+ %getelementptr2 = getelementptr i32, ptr %ptr, i64 8
+ store <4 x i32> %1, ptr %getelementptr2, align 4
+ %getelementptr3 = getelementptr i32, ptr %ptr, i64 12
+ store <4 x i32> poison, ptr %getelementptr3, align 4
+
+ ret void
+}
+
+; This could be re-vectorised to use a store <8 x i32> instruction.
+define void @test_valid_value_operands(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_valid_value_operands(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> %1, ptr %getelementptr1, align 4
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/146921
More information about the llvm-commits
mailing list