[llvm] [SLP][REVEC] Make getExtractWithExtendCost support FixedVectorType as Dst. (PR #134822)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 8 06:29:52 PDT 2025


https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/134822

>From 35a1a410d31cbcf93d75d79d81c302ecd585d18c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 8 Apr 2025 02:27:59 -0700
Subject: [PATCH 1/3] [SLP][REVEC] Pre-commit test.

---
 .../X86/revec-getExtractWithExtendCost.ll         | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
new file mode 100644
index 0000000000000..a3664c6d2ab99
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.2-512 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @test() {
+entry:
+  %0 = sub <8 x i64> zeroinitializer, splat (i64 1)
+  %1 = sub <8 x i64> zeroinitializer, zeroinitializer
+  %2 = or <8 x i64> %0, zeroinitializer
+  %3 = trunc <8 x i64> %0 to <8 x i32>
+  %4 = trunc <8 x i64> %1 to <8 x i32>
+  %5 = getelementptr i8, ptr null, i64 32
+  store <8 x i32> %3, ptr null, align 4
+  store <8 x i32> %4, ptr %5, align 4
+  ret void
+}

>From 7aae441a88b97dff6819c37c680d9d662e38587e Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 8 Apr 2025 02:25:39 -0700
Subject: [PATCH 2/3] [SLP][REVEC] Make getExtractWithExtendCost support
 FixedVectorType as Dst.

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 27 ++++++++++++++++---
 .../X86/revec-getExtractWithExtendCost.ll     | 12 +++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e6559f26be8c2..6a96d6e40674c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5399,6 +5399,25 @@ static InstructionCost getVectorInstrCost(
                                 ScalarUserAndIdx);
 }
 
+/// This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst
+/// is a FixedVectorType, a vector will be extracted instead of a scalar.
+static InstructionCost getExtractWithExtendCost(const TargetTransformInfo &TTI,
+                                                unsigned Opcode, Type *Dst,
+                                                VectorType *VecTy,
+                                                unsigned Index) {
+  if (auto *ScalarTy = dyn_cast<FixedVectorType>(Dst)) {
+    assert(SLPReVec && "Only supported by REVEC.");
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+    auto *SubTp =
+        getWidenedType(VecTy->getElementType(), ScalarTy->getNumElements());
+    return getShuffleCost(TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
+                          Index * ScalarTy->getNumElements(), SubTp) +
+           TTI.getCastInstrCost(Opcode, Dst, SubTp, TTI::CastContextHint::None,
+                                CostKind);
+  }
+  return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
+}
+
 /// Correctly creates insert_subvector, checking that the index is multiple of
 /// the subvectors length. Otherwise, generates shuffle using \p Generator or
 /// using default shuffle.
@@ -14088,13 +14107,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
     const TreeEntry *Entry = &EU.E;
     auto It = MinBWs.find(Entry);
     if (It != MinBWs.end()) {
-      auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
+      Type *MinTy = IntegerType::get(F->getContext(), It->second.first);
+      if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+        MinTy = getWidenedType(MinTy, VecTy->getNumElements());
       unsigned Extend = isKnownNonNegative(EU.Scalar, SimplifyQuery(*DL))
                             ? Instruction::ZExt
                             : Instruction::SExt;
       VecTy = getWidenedType(MinTy, BundleWidth);
-      ExtraCost = TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
-                                                VecTy, EU.Lane);
+      ExtraCost =
+          getExtractWithExtendCost(*TTI, Extend, ScalarTy, VecTy, EU.Lane);
     } else {
       ExtraCost =
           getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
index a3664c6d2ab99..8d3d9f2979298 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
@@ -2,6 +2,18 @@
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.2-512 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
 
 define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub <8 x i64> zeroinitializer, splat (i64 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <8 x i64> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i64> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr null, i64 32
+; CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr null, align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = sub <8 x i64> zeroinitializer, splat (i64 1)
   %1 = sub <8 x i64> zeroinitializer, zeroinitializer

>From a2a0d6d4dd167ed654a9518840af0161d463fb2c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 8 Apr 2025 06:29:37 -0700
Subject: [PATCH 3/3] apply comment

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6a96d6e40674c..a916792a4b4fd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5401,13 +5401,12 @@ static InstructionCost getVectorInstrCost(
 
 /// This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst
 /// is a FixedVectorType, a vector will be extracted instead of a scalar.
-static InstructionCost getExtractWithExtendCost(const TargetTransformInfo &TTI,
-                                                unsigned Opcode, Type *Dst,
-                                                VectorType *VecTy,
-                                                unsigned Index) {
+static InstructionCost getExtractWithExtendCost(
+    const TargetTransformInfo &TTI, unsigned Opcode, Type *Dst,
+    VectorType *VecTy, unsigned Index,
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
   if (auto *ScalarTy = dyn_cast<FixedVectorType>(Dst)) {
     assert(SLPReVec && "Only supported by REVEC.");
-    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
     auto *SubTp =
         getWidenedType(VecTy->getElementType(), ScalarTy->getNumElements());
     return getShuffleCost(TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,



More information about the llvm-commits mailing list