[llvm] [SLP][REVEC] Fix insertelement legality checks (PR #146921)
Gaƫtan Bossu via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 00:55:01 PDT 2025
https://github.com/gbossu updated https://github.com/llvm/llvm-project/pull/146921
>From b2bbc9d6a596b960da9bb2de7726d21c3a2b0232 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Thu, 3 Jul 2025 16:21:16 +0000
Subject: [PATCH] [SLP][REVEC] Fix insertelement legality checks
The current code assumes that all the values in VL are valid
instructions, while it is possible to get poison.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 4 +
.../SLPVectorizer/revec-insertelement.ll | 88 +++++++++++++++++++
2 files changed, 92 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0941bf61953f1..921668d0d9c43 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9060,6 +9060,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
// different vectors.
ValueSet SourceVectors;
for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement/poison vector.\n");
+ return TreeEntry::NeedToGather;
+ }
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
assert(getElementIndex(V) != std::nullopt &&
"Non-constant or undef index?");
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll
new file mode 100644
index 0000000000000..9dbaadeca1f41
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-1000 %s | FileCheck %s
+
+; The 4 stores can be re-vectorised, make sure the poison sources
+; are safely handled when trying to vectorise [ %0, poison, poison, %1 ]
+define void @test_missing_lanes_1_2(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_missing_lanes_1_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: store <8 x i32> poison, ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> poison, ptr %getelementptr1, align 4
+ %getelementptr2 = getelementptr i32, ptr %ptr, i64 8
+ store <4 x i32> poison, ptr %getelementptr2, align 4
+ %getelementptr3 = getelementptr i32, ptr %ptr, i64 12
+ store <4 x i32> %1, ptr %getelementptr3, align 4
+
+ ret void
+}
+
+; The 4 stores can be re-vectorised, make sure the poison sources
+; are safely handled when trying to vectorise [ %0, poison, %1, poison ]
+define void @test_missing_lanes_1_3(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_missing_lanes_1_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP1]], i64 4)
+; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
+; CHECK-NEXT: store <4 x i32> poison, ptr [[GETELEMENTPTR3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> poison, ptr %getelementptr1, align 4
+ %getelementptr2 = getelementptr i32, ptr %ptr, i64 8
+ store <4 x i32> %1, ptr %getelementptr2, align 4
+ %getelementptr3 = getelementptr i32, ptr %ptr, i64 12
+ store <4 x i32> poison, ptr %getelementptr3, align 4
+
+ ret void
+}
+
+; This could be re-vectorised to use a store <8 x i32> instruction.
+define void @test_valid_value_operands(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_valid_value_operands(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> %1, ptr %getelementptr1, align 4
+ ret void
+}
More information about the llvm-commits
mailing list