[llvm] 97c4cb4 - [SLP][REVEC] getNumElements should not be used as VF when REVEC is enabled. (#134763)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 8 07:29:07 PDT 2025
Author: Han-Kuan Chen
Date: 2025-04-08T22:29:03+08:00
New Revision: 97c4cb4d13b2049cdfe884f4816aeaf6770d0c32
URL: https://github.com/llvm/llvm-project/commit/97c4cb4d13b2049cdfe884f4816aeaf6770d0c32
DIFF: https://github.com/llvm/llvm-project/commit/97c4cb4d13b2049cdfe884f4816aeaf6770d0c32.diff
LOG: [SLP][REVEC] getNumElements should not be used as VF when REVEC is enabled. (#134763)
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/revec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e6559f26be8c2..dbc4c895109e0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16080,11 +16080,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
unsigned VF = std::max(CommonMask.size(), Mask.size());
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
if (CommonMask[Idx] == PoisonMaskElem && Mask[Idx] != PoisonMaskElem)
- CommonMask[Idx] =
- V->getType() != V1->getType()
- ? Idx + VF
- : Mask[Idx] + cast<FixedVectorType>(V1->getType())
- ->getNumElements();
+ CommonMask[Idx] = V->getType() != V1->getType()
+ ? Idx + VF
+ : Mask[Idx] + getVF(V1);
if (V->getType() != V1->getType())
V1 = createShuffle(V1, nullptr, Mask);
InVectors.front() = V;
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index ce13f478d3811..10f52c7c341cb 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -481,3 +481,44 @@ for.end.loopexit:
%or0 = or <4 x i16> %phi1, zeroinitializer
ret void
}
+
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr null, i64 480
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr null, i64 160
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr [[TMP1]], align 16
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[TMP1]], align 16
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr null, align 16
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP4]], <4 x float> zeroinitializer, i64 4)
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP5]], <4 x float> zeroinitializer, i64 8)
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 12)
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 8)
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12)
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[TMP12:%.*]] = fadd <16 x float> [[TMP7]], [[TMP11]]
+; CHECK-NEXT: store <16 x float> [[TMP12]], ptr [[TMP0]], align 16
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %0 = getelementptr i8, ptr null, i64 512
+ %1 = getelementptr i8, ptr null, i64 528
+ %2 = getelementptr i8, ptr null, i64 480
+ %3 = getelementptr i8, ptr null, i64 496
+ %4 = getelementptr i8, ptr null, i64 160
+ %5 = load <4 x float>, ptr %4, align 16
+ %6 = getelementptr i8, ptr null, i64 176
+ %7 = load <4 x float>, ptr %6, align 16
+ store <4 x float> %5, ptr null, align 16
+ %8 = fadd <4 x float> zeroinitializer, %5
+ %9 = fadd <4 x float> zeroinitializer, %7
+ store <4 x float> %8, ptr %2, align 16
+ store <4 x float> %9, ptr %3, align 16
+ %10 = fadd <4 x float> zeroinitializer, zeroinitializer
+ %11 = fadd <4 x float> zeroinitializer, zeroinitializer
+ store <4 x float> %10, ptr %0, align 16
+ store <4 x float> %11, ptr %1, align 16
+ ret i32 0
+}
More information about the llvm-commits
mailing list