[llvm] 0617629 - [SLP][REVEC] Fix cost model for getBuildVectorCost with FixedVectorType ScalarTy. (#110073)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 06:51:16 PDT 2024
Author: Han-Kuan Chen
Date: 2024-09-30T21:51:12+08:00
New Revision: 061762933b7a51452a9425b0f66e448a67d40157
URL: https://github.com/llvm/llvm-project/commit/061762933b7a51452a9425b0f66e448a67d40157
DIFF: https://github.com/llvm/llvm-project/commit/061762933b7a51452a9425b0f66e448a67d40157.diff
LOG: [SLP][REVEC] Fix cost model for getBuildVectorCost with FixedVectorType ScalarTy. (#110073)
BoUpSLP::gather always use CreateInsertVector for FixedVectorType
ScalarTy.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e45fcb2b5c790c..6b8ec55b30426c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9369,10 +9369,18 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
bool NeedShuffle =
count(VL, *It) > 1 &&
(VL.front() != *It || !all_of(VL.drop_front(), IsaPred<UndefValue>));
- if (!NeedShuffle)
+ if (!NeedShuffle) {
+ if (isa<FixedVectorType>(ScalarTy)) {
+ assert(SLPReVec && "FixedVectorType is not expected.");
+ return TTI.getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, {}, CostKind,
+ std::distance(VL.begin(), It) * getNumElements(ScalarTy),
+ cast<FixedVectorType>(ScalarTy));
+ }
return TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
CostKind, std::distance(VL.begin(), It),
PoisonValue::get(VecTy), *It);
+ }
SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
transform(VL, ShuffleMask.begin(), [](Value *V) {
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index 887f59bbda94d6..995cd7cfbc880b 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -1,19 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-10 -pass-remarks-output=%t %s | FileCheck %s
+; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-20 -pass-remarks-output=%t %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
; YAML: --- !Passed
; YAML: Pass: slp-vectorizer
; YAML: Name: StoresVectorized
-; YAML: Function: test
+; YAML: Function: test1
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
; YAML: - Cost: '6'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
-define void @test(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
-; CHECK-LABEL: @test(
+define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
+; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <4 x float> [[LOAD6:%.*]], <4 x float> [[LOAD7:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
@@ -40,3 +40,42 @@ entry:
}
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+; YAML: --- !Passed
+; YAML: Pass: slp-vectorizer
+; YAML: Name: StoresVectorized
+; YAML: Function: test2
+; YAML: Args:
+; YAML: - String: 'Stores SLP vectorized with cost '
+; YAML: - Cost: '16'
+; YAML: - String: ' and with tree size '
+; YAML: - TreeSize: '5'
+
+define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, <8 x float> %load17, <8 x float> %fmuladd7, <8 x float> %fmuladd16, ptr %out_ptr) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <8 x float> [[LOAD6:%.*]], <8 x float> [[LOAD7:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0)
+; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8)
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP2]], <8 x float> [[LOAD17:%.*]], i64 0)
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0)
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8)
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP4]], <16 x float> [[TMP6]])
+; CHECK-NEXT: store <16 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %vext165.i = shufflevector <8 x float> %load6, <8 x float> %load7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ %vext309.i = shufflevector <8 x float> %load7, <8 x float> %load8, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ %fmuladd8 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext165.i, <8 x float> %load17, <8 x float> %fmuladd7)
+ %fmuladd17 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext309.i, <8 x float> %load17, <8 x float> %fmuladd16)
+ %add.ptr.i.i = getelementptr inbounds i8, ptr %out_ptr, i64 32
+ store <8 x float> %fmuladd8, ptr %out_ptr, align 4
+ store <8 x float> %fmuladd17, ptr %add.ptr.i.i, align 4
+ ret void
+}
+
+declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
More information about the llvm-commits
mailing list