[llvm] d02a704 - [SLP][REVEC] Make getExtractWithExtendCost support FixedVectorType as Dst. (#134822)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 03:54:49 PDT 2025
Author: Han-Kuan Chen
Date: 2025-04-10T18:54:45+08:00
New Revision: d02a704ec952f01ab258e8c4cbb3c01c8f768e15
URL: https://github.com/llvm/llvm-project/commit/d02a704ec952f01ab258e8c4cbb3c01c8f768e15
DIFF: https://github.com/llvm/llvm-project/commit/d02a704ec952f01ab258e8c4cbb3c01c8f768e15.diff
LOG: [SLP][REVEC] Make getExtractWithExtendCost support FixedVectorType as Dst. (#134822)
Added:
llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b13afa8ef876c..4611d162edbe2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5410,6 +5410,24 @@ static InstructionCost getVectorInstrCost(
ScalarUserAndIdx);
}
+/// This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst
+/// is a FixedVectorType, a vector will be extracted instead of a scalar.
+static InstructionCost getExtractWithExtendCost(
+ const TargetTransformInfo &TTI, unsigned Opcode, Type *Dst,
+ VectorType *VecTy, unsigned Index,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+ if (auto *ScalarTy = dyn_cast<FixedVectorType>(Dst)) {
+ assert(SLPReVec && "Only supported by REVEC.");
+ auto *SubTp =
+ getWidenedType(VecTy->getElementType(), ScalarTy->getNumElements());
+ return getShuffleCost(TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
+ Index * ScalarTy->getNumElements(), SubTp) +
+ TTI.getCastInstrCost(Opcode, Dst, SubTp, TTI::CastContextHint::None,
+ CostKind);
+ }
+ return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
+}
+
/// Correctly creates insert_subvector, checking that the index is multiple of
/// the subvectors length. Otherwise, generates shuffle using \p Generator or
/// using default shuffle.
@@ -14155,13 +14173,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
const TreeEntry *Entry = &EU.E;
auto It = MinBWs.find(Entry);
if (It != MinBWs.end()) {
- auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
+ Type *MinTy = IntegerType::get(F->getContext(), It->second.first);
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ MinTy = getWidenedType(MinTy, VecTy->getNumElements());
unsigned Extend = isKnownNonNegative(EU.Scalar, SimplifyQuery(*DL))
? Instruction::ZExt
: Instruction::SExt;
VecTy = getWidenedType(MinTy, BundleWidth);
- ExtraCost = TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
- VecTy, EU.Lane);
+ ExtraCost =
+ getExtractWithExtendCost(*TTI, Extend, ScalarTy, VecTy, EU.Lane);
} else {
ExtraCost =
getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
new file mode 100644
index 0000000000000..8d3d9f2979298
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.2-512 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = sub <8 x i64> zeroinitializer, splat (i64 1)
+; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i64> zeroinitializer, zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr null, i64 32
+; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4
+; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = sub <8 x i64> zeroinitializer, splat (i64 1)
+ %1 = sub <8 x i64> zeroinitializer, zeroinitializer
+ %2 = or <8 x i64> %0, zeroinitializer
+ %3 = trunc <8 x i64> %0 to <8 x i32>
+ %4 = trunc <8 x i64> %1 to <8 x i32>
+ %5 = getelementptr i8, ptr null, i64 32
+ store <8 x i32> %3, ptr null, align 4
+ store <8 x i32> %4, ptr %5, align 4
+ ret void
+}
More information about the llvm-commits
mailing list