[llvm] d02a704 - [SLP][REVEC] Make getExtractWithExtendCost support FixedVectorType as Dst. (#134822)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 10 03:54:49 PDT 2025


Author: Han-Kuan Chen
Date: 2025-04-10T18:54:45+08:00
New Revision: d02a704ec952f01ab258e8c4cbb3c01c8f768e15

URL: https://github.com/llvm/llvm-project/commit/d02a704ec952f01ab258e8c4cbb3c01c8f768e15
DIFF: https://github.com/llvm/llvm-project/commit/d02a704ec952f01ab258e8c4cbb3c01c8f768e15.diff

LOG: [SLP][REVEC] Make getExtractWithExtendCost support FixedVectorType as Dst. (#134822)

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b13afa8ef876c..4611d162edbe2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5410,6 +5410,24 @@ static InstructionCost getVectorInstrCost(
                                 ScalarUserAndIdx);
 }
 
+/// This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst
+/// is a FixedVectorType, a vector will be extracted instead of a scalar.
+static InstructionCost getExtractWithExtendCost(
+    const TargetTransformInfo &TTI, unsigned Opcode, Type *Dst,
+    VectorType *VecTy, unsigned Index,
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+  if (auto *ScalarTy = dyn_cast<FixedVectorType>(Dst)) {
+    assert(SLPReVec && "Only supported by REVEC.");
+    auto *SubTp =
+        getWidenedType(VecTy->getElementType(), ScalarTy->getNumElements());
+    return getShuffleCost(TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
+                          Index * ScalarTy->getNumElements(), SubTp) +
+           TTI.getCastInstrCost(Opcode, Dst, SubTp, TTI::CastContextHint::None,
+                                CostKind);
+  }
+  return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
+}
+
 /// Correctly creates insert_subvector, checking that the index is multiple of
 /// the subvectors length. Otherwise, generates shuffle using \p Generator or
 /// using default shuffle.
@@ -14155,13 +14173,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
     const TreeEntry *Entry = &EU.E;
     auto It = MinBWs.find(Entry);
     if (It != MinBWs.end()) {
-      auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
+      Type *MinTy = IntegerType::get(F->getContext(), It->second.first);
+      if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+        MinTy = getWidenedType(MinTy, VecTy->getNumElements());
       unsigned Extend = isKnownNonNegative(EU.Scalar, SimplifyQuery(*DL))
                             ? Instruction::ZExt
                             : Instruction::SExt;
       VecTy = getWidenedType(MinTy, BundleWidth);
-      ExtraCost = TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
-                                                VecTy, EU.Lane);
+      ExtraCost =
+          getExtractWithExtendCost(*TTI, Extend, ScalarTy, VecTy, EU.Lane);
     } else {
       ExtraCost =
           getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
new file mode 100644
index 0000000000000..8d3d9f2979298
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.2-512 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub <8 x i64> zeroinitializer, splat (i64 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <8 x i64> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i64> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr null, i64 32
+; CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr null, align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = sub <8 x i64> zeroinitializer, splat (i64 1)
+  %1 = sub <8 x i64> zeroinitializer, zeroinitializer
+  %2 = or <8 x i64> %0, zeroinitializer
+  %3 = trunc <8 x i64> %0 to <8 x i32>
+  %4 = trunc <8 x i64> %1 to <8 x i32>
+  %5 = getelementptr i8, ptr null, i64 32
+  store <8 x i32> %3, ptr null, align 4
+  store <8 x i32> %4, ptr %5, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list