[llvm] [SLP][REVEC] getNumElements should not be used as VF when REVEC is enabled. (PR #134031)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 1 23:04:08 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Han-Kuan Chen (HanKuanChen)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/134031.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+1-2)
- (added) llvm/test/Transforms/SLPVectorizer/X86/revec-estimateNodesPermuteCost.ll (+71)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 18c896767b6d2..6201541d905bd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11298,8 +11298,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
if (!E2 && InVectors.size() == 1) {
unsigned VF = E1.getVectorFactor();
if (Value *V1 = dyn_cast<Value *>(InVectors.front())) {
- VF = std::max(VF,
- cast<FixedVectorType>(V1->getType())->getNumElements());
+ VF = std::max(VF, getVF(V1));
} else {
const auto *E = cast<const TreeEntry *>(InVectors.front());
VF = std::max(VF, E->getVectorFactor());
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-estimateNodesPermuteCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-estimateNodesPermuteCost.ll
new file mode 100644
index 0000000000000..c69d9eaa572b0
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-estimateNodesPermuteCost.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
+
+define i32 @test1(<4 x float> %0, <4 x float> %1) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr null, i64 288
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr null, i64 304
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr null, i64 416
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr null, i64 432
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr null, i64 256
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr null, i64 272
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr null, i64 288
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr null, i64 304
+; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[TMP2]], align 16
+; CHECK-NEXT: [[TMP11:%.*]] = load <4 x float>, ptr [[TMP3]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = load <4 x float>, ptr [[TMP4]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = load <4 x float>, ptr [[TMP5]], align 16
+; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x float> [[TMP10]], [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP11]], [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = fmul <4 x float> [[TMP12]], [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = fmul <4 x float> [[TMP13]], [[TMP0]]
+; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP14]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP20:%.*]] = fsub <4 x float> [[TMP16]], [[TMP1]]
+; CHECK-NEXT: [[TMP21:%.*]] = fsub <4 x float> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = fmul <4 x float> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP18]], [[TMP0]]
+; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP26:%.*]] = fadd <4 x float> [[TMP20]], [[TMP0]]
+; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: store <4 x float> [[TMP24]], ptr [[TMP6]], align 16
+; CHECK-NEXT: store <4 x float> [[TMP25]], ptr [[TMP7]], align 16
+; CHECK-NEXT: store <4 x float> [[TMP26]], ptr [[TMP8]], align 16
+; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[TMP9]], align 16
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %2 = getelementptr i8, ptr null, i64 288
+ %3 = getelementptr i8, ptr null, i64 304
+ %4 = getelementptr i8, ptr null, i64 416
+ %5 = getelementptr i8, ptr null, i64 432
+ %6 = getelementptr i8, ptr null, i64 256
+ %7 = getelementptr i8, ptr null, i64 272
+ %8 = getelementptr i8, ptr null, i64 288
+ %9 = getelementptr i8, ptr null, i64 304
+ %10 = load <4 x float>, ptr %2, align 16
+ %11 = load <4 x float>, ptr %3, align 16
+ %12 = load <4 x float>, ptr %4, align 16
+ %13 = load <4 x float>, ptr %5, align 16
+ %14 = fmul <4 x float> %10, %0
+ %15 = fmul <4 x float> %11, %0
+ %16 = fmul <4 x float> %12, %0
+ %17 = fmul <4 x float> %13, %0
+ %18 = fsub <4 x float> %14, %1
+ %19 = fsub <4 x float> %15, zeroinitializer
+ %20 = fsub <4 x float> %16, %1
+ %21 = fsub <4 x float> %17, zeroinitializer
+ %22 = fmul <4 x float> %11, zeroinitializer
+ %23 = fmul <4 x float> %13, zeroinitializer
+ %24 = fadd <4 x float> %18, %0
+ %25 = fadd <4 x float> %19, zeroinitializer
+ %26 = fadd <4 x float> %20, %0
+ %27 = fadd <4 x float> %21, zeroinitializer
+ store <4 x float> %24, ptr %6, align 16
+ store <4 x float> %25, ptr %7, align 16
+ store <4 x float> %26, ptr %8, align 16
+ store <4 x float> %27, ptr %9, align 16
+ ret i32 0
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/134031
More information about the llvm-commits
mailing list