[llvm] [SLP][REVEC] Add ExtractSubvector for ExternalUses. (PR #132761)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 08:43:56 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Han-Kuan Chen (HanKuanChen)
<details>
<summary>Changes</summary>
For llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll,
ScalarCost and ExtraCost is 1, so the original scalar will be kept.
---
Full diff: https://github.com/llvm/llvm-project/pull/132761.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+15-6)
- (added) llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll (+60)
- (modified) llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll (+5-4)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0201955b8b559..7272ca6242fa8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13623,12 +13623,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
!ExtractCostCalculated.insert(EU.Scalar).second)
continue;
- // No extract cost for vector "scalar"
- if (isa<FixedVectorType>(EU.Scalar->getType()))
+ // No extract cost for vector "scalar" if REVEC is disabled
+ if (!SLPReVec && isa<FixedVectorType>(EU.Scalar->getType()))
continue;
// If found user is an insertelement, do not calculate extract cost but try
// to detect it as a final shuffled/identity match.
+ // TODO: what if a user is insertvalue when REVEC is enabled?
if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User);
VU && VU->getOperand(1) == EU.Scalar) {
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
@@ -13702,7 +13703,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
InstructionCost ExtraCost = TTI::TCC_Free;
- auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth);
+ auto *ScalarTy = EU.Scalar->getType();
+ auto *VecTy = getWidenedType(ScalarTy, BundleWidth);
const TreeEntry *Entry = &EU.E;
auto It = MinBWs.find(Entry);
if (It != MinBWs.end()) {
@@ -13714,9 +13716,16 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
ExtraCost = TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
VecTy, EU.Lane);
} else {
- ExtraCost =
- TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
- EU.Lane, EU.Scalar, ScalarUserAndIdx);
+ if (auto *FixedVecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
+ assert(SLPReVec && "Only supported by REVEC.");
+ ExtraCost =
+ getShuffleCost(*TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
+ EU.Lane * FixedVecTy->getNumElements(), FixedVecTy);
+ } else {
+ ExtraCost = TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
+ CostKind, EU.Lane, EU.Scalar,
+ ScalarUserAndIdx);
+ }
}
// Leave the scalar instructions as is if they are cheaper than extracts.
if (Entry->Idx != 0 || Entry->getOpcode() == Instruction::GetElementPtr ||
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
new file mode 100644
index 0000000000000..6c378ac2e583d
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -slp-revec -pass-remarks-output=%t < %s | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
+
+; See https://reviews.llvm.org/D70068 and https://reviews.llvm.org/D70587 for context
+
+; YAML: --- !Passed
+; YAML: Pass: slp-vectorizer
+; YAML: Name: VectorizedList
+; YAML: Function: StructOfVectors
+; YAML: Args:
+; YAML: - String: 'SLP vectorized with cost '
+; YAML: - Cost: '-10'
+; YAML: - String: ' and with tree size '
+; YAML: - TreeSize: '3'
+
+; YAML: --- !Missed
+; YAML: Pass: slp-vectorizer
+; YAML: Name: NotBeneficial
+; YAML: Function: StructOfVectors
+; YAML: Args:
+; YAML: - String: 'List vectorization was possible but not beneficial with cost '
+; YAML: - Cost: '0'
+; YAML: - String: ' >= '
+; YAML: - Treshold: '0'
+
+; Checks that vector insertvalues into the struct become SLP seeds.
+define { <2 x float>, <2 x float> } @StructOfVectors(ptr %Ptr) {
+; CHECK-LABEL: @StructOfVectors(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP6]], 0
+; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP7]], 1
+; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
+;
+ %L0 = load float, ptr %Ptr
+ %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+ %L1 = load float, ptr %GEP1
+ %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+ %L2 = load float, ptr %GEP2
+ %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+ %L3 = load float, ptr %GEP3
+
+ %Fadd0 = fadd fast float %L0, 1.1e+01
+ %Fadd1 = fadd fast float %L1, 1.2e+01
+ %Fadd2 = fadd fast float %L2, 1.3e+01
+ %Fadd3 = fadd fast float %L3, 1.4e+01
+
+ %VecIn0 = insertelement <2 x float> undef, float %Fadd0, i64 0
+ %VecIn1 = insertelement <2 x float> %VecIn0, float %Fadd1, i64 1
+
+ %VecIn2 = insertelement <2 x float> undef, float %Fadd2, i64 0
+ %VecIn3 = insertelement <2 x float> %VecIn2, float %Fadd3, i64 1
+
+ %Ret0 = insertvalue {<2 x float>, <2 x float>} undef, <2 x float> %VecIn1, 0
+ %Ret1 = insertvalue {<2 x float>, <2 x float>} %Ret0, <2 x float> %VecIn3, 1
+ ret {<2 x float>, <2 x float>} %Ret1
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
index d6c09bc224a7d..f11a0a9c024a2 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
@@ -135,15 +135,16 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) {
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2:%.*]], align 16
; CHECK-NEXT: [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32
+; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2:%.*]], i64 128
+; CHECK-NEXT: [[TMP17:%.*]] = load <8 x float>, ptr [[IN0]], align 16
+; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2]], align 16
; CHECK-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2]], i64 128
; CHECK-NEXT: [[TMP6:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
``````````
</details>
https://github.com/llvm/llvm-project/pull/132761
More information about the llvm-commits
mailing list