[llvm] 2682a94 - [SLP][REVEC] Add ExtractSubvector cost for ExternalUses. (#132761)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 03:58:58 PDT 2025
Author: Han-Kuan Chen
Date: 2025-03-25T18:58:54+08:00
New Revision: 2682a9433bb5df0efd9e926947ae289f6e8aecde
URL: https://github.com/llvm/llvm-project/commit/2682a9433bb5df0efd9e926947ae289f6e8aecde
DIFF: https://github.com/llvm/llvm-project/commit/2682a9433bb5df0efd9e926947ae289f6e8aecde.diff
LOG: [SLP][REVEC] Add ExtractSubvector cost for ExternalUses. (#132761)
For llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll,
ScalarCost and ExtraCost is 1, so the original scalar will be kept.
Added:
llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 95d697bbd734a..4dc398f716b30 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5360,6 +5360,25 @@ getScalarizationOverhead(const TargetTransformInfo &TTI, Type *ScalarTy,
return Cost;
}
+/// This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy
+/// is a FixedVectorType, a vector will be extracted instead of a scalar.
+static InstructionCost getVectorInstrCost(
+ const TargetTransformInfo &TTI, Type *ScalarTy, unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar,
+ ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
+ if (Opcode == Instruction::ExtractElement) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
+ assert(SLPReVec && "Only supported by REVEC.");
+ assert(isa<VectorType>(Val) && "Val must be a vector type.");
+ return getShuffleCost(TTI, TTI::SK_ExtractSubvector,
+ cast<VectorType>(Val), {}, CostKind,
+ Index * VecTy->getNumElements(), VecTy);
+ }
+ }
+ return TTI.getVectorInstrCost(Opcode, Val, CostKind, Index, Scalar,
+ ScalarUserAndIdx);
+}
+
/// Correctly creates insert_subvector, checking that the index is multiple of
/// the subvectors length. Otherwise, generates shuffle using \p Generator or
/// using default shuffle.
@@ -13649,12 +13668,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
!ExtractCostCalculated.insert(EU.Scalar).second)
continue;
- // No extract cost for vector "scalar"
- if (isa<FixedVectorType>(EU.Scalar->getType()))
+ // No extract cost for vector "scalar" if REVEC is disabled
+ if (!SLPReVec && isa<FixedVectorType>(EU.Scalar->getType()))
continue;
// If found user is an insertelement, do not calculate extract cost but try
// to detect it as a final shuffled/identity match.
+ // TODO: what if a user is insertvalue when REVEC is enabled?
if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User);
VU && VU->getOperand(1) == EU.Scalar) {
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
@@ -13728,7 +13748,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
InstructionCost ExtraCost = TTI::TCC_Free;
- auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth);
+ auto *ScalarTy = EU.Scalar->getType();
+ auto *VecTy = getWidenedType(ScalarTy, BundleWidth);
const TreeEntry *Entry = &EU.E;
auto It = MinBWs.find(Entry);
if (It != MinBWs.end()) {
@@ -13741,8 +13762,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
VecTy, EU.Lane);
} else {
ExtraCost =
- TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
- EU.Lane, EU.Scalar, ScalarUserAndIdx);
+ getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,
+ CostKind, EU.Lane, EU.Scalar, ScalarUserAndIdx);
}
// Leave the scalar instructions as is if they are cheaper than extracts.
if (Entry->Idx != 0 || Entry->getOpcode() == Instruction::GetElementPtr ||
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
new file mode 100644
index 0000000000000..459751424ec13
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -slp-revec -pass-remarks-output=%t < %s | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
+
+; See https://reviews.llvm.org/D70068 and https://reviews.llvm.org/D70587 for context
+
+; YAML: --- !Passed
+; YAML: Pass: slp-vectorizer
+; YAML: Name: VectorizedList
+; YAML: Function: StructOfVectors
+; YAML: Args:
+; YAML: - String: 'SLP vectorized with cost '
+; YAML: - Cost: '-10'
+; YAML: - String: ' and with tree size '
+; YAML: - TreeSize: '3'
+
+; YAML: --- !Missed
+; YAML: Pass: slp-vectorizer
+; YAML: Name: NotBeneficial
+; YAML: Function: StructOfVectors
+; YAML: Args:
+; YAML: - String: 'List vectorization was possible but not beneficial with cost '
+; YAML: - Cost: '0'
+; YAML: - String: ' >= '
+; YAML: - Treshold: '0'
+
+; Checks that vector insertvalues into the struct become SLP seeds.
+define { <2 x float>, <2 x float> } @StructOfVectors(ptr %Ptr) {
+; CHECK-LABEL: @StructOfVectors(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[TMP6]], 0
+; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP7]], 1
+; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
+;
+ %L0 = load float, ptr %Ptr
+ %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+ %L1 = load float, ptr %GEP1
+ %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+ %L2 = load float, ptr %GEP2
+ %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+ %L3 = load float, ptr %GEP3
+
+ %Fadd0 = fadd fast float %L0, 1.1e+01
+ %Fadd1 = fadd fast float %L1, 1.2e+01
+ %Fadd2 = fadd fast float %L2, 1.3e+01
+ %Fadd3 = fadd fast float %L3, 1.4e+01
+
+ %VecIn0 = insertelement <2 x float> poison, float %Fadd0, i64 0
+ %VecIn1 = insertelement <2 x float> %VecIn0, float %Fadd1, i64 1
+
+ %VecIn2 = insertelement <2 x float> poison, float %Fadd2, i64 0
+ %VecIn3 = insertelement <2 x float> %VecIn2, float %Fadd3, i64 1
+
+ %Ret0 = insertvalue {<2 x float>, <2 x float>} poison, <2 x float> %VecIn1, 0
+ %Ret1 = insertvalue {<2 x float>, <2 x float>} %Ret0, <2 x float> %VecIn3, 1
+ ret {<2 x float>, <2 x float>} %Ret1
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
index d6c09bc224a7d..f11a0a9c024a2 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
@@ -135,15 +135,16 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) {
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2:%.*]], align 16
; CHECK-NEXT: [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32
+; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2:%.*]], i64 128
+; CHECK-NEXT: [[TMP17:%.*]] = load <8 x float>, ptr [[IN0]], align 16
+; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2]], align 16
; CHECK-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2]], i64 128
; CHECK-NEXT: [[TMP6:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
More information about the llvm-commits
mailing list