[llvm] [SLP][REVEC] Add ExtractSubvector for ExternalUses. (PR #132761)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 24 08:43:17 PDT 2025


https://github.com/HanKuanChen created https://github.com/llvm/llvm-project/pull/132761

For llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll,
ScalarCost and ExtraCost is 1, so the original scalar will be kept.

>From 28f32d604d20b927f6fad7237aa22cd93b30f845 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 24 Mar 2025 06:43:07 -0700
Subject: [PATCH 1/2] [SLP][REVEC] Pre-commit test.

---
 .../X86/revec-ExtractSubvector.ll             | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
new file mode 100644
index 0000000000000..7548e7b2675ee
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -slp-revec -pass-remarks-output=%t < %s | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
+
+; See https://reviews.llvm.org/D70068 and https://reviews.llvm.org/D70587 for context
+
+; YAML: --- !Passed
+; YAML: Pass:            slp-vectorizer
+; YAML: Name:            VectorizedList
+; YAML: Function:        StructOfVectors
+; YAML: Args:
+; YAML:   - String:          'SLP vectorized with cost '
+; YAML:   - Cost:            '-10'
+; YAML:   - String:          ' and with tree size '
+; YAML:   - TreeSize:        '3'
+
+; YAML: --- !Passed
+; YAML: Pass:            slp-vectorizer
+; YAML: Name:            VectorizedList
+; YAML: Function:        StructOfVectors
+; YAML: Args:
+; YAML:   - String:          'SLP vectorized with cost '
+; YAML:   - Cost:            '-1'
+; YAML:   - String:          ' and with tree size '
+; YAML:   - TreeSize:        '3'
+
+; Checks that vector insertvalues into the struct become SLP seeds.
+define { <2 x float>, <2 x float> } @StructOfVectors(ptr %Ptr) {
+; CHECK-LABEL: @StructOfVectors(
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[TMP2]], i64 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP6:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP5]], i64 0)
+; CHECK-NEXT:    [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP6]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP5]], i64 2)
+; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP7]], 1
+; CHECK-NEXT:    ret { <2 x float>, <2 x float> } [[RET1]]
+;
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
+
+  %Fadd0 = fadd fast float %L0, 1.1e+01
+  %Fadd1 = fadd fast float %L1, 1.2e+01
+  %Fadd2 = fadd fast float %L2, 1.3e+01
+  %Fadd3 = fadd fast float %L3, 1.4e+01
+
+  %VecIn0 = insertelement <2 x float> undef, float %Fadd0, i64 0
+  %VecIn1 = insertelement <2 x float> %VecIn0, float %Fadd1, i64 1
+
+  %VecIn2 = insertelement <2 x float> undef, float %Fadd2, i64 0
+  %VecIn3 = insertelement <2 x float> %VecIn2, float %Fadd3, i64 1
+
+  %Ret0 = insertvalue {<2 x float>, <2 x float>} undef, <2 x float> %VecIn1, 0
+  %Ret1 = insertvalue {<2 x float>, <2 x float>} %Ret0, <2 x float> %VecIn3, 1
+  ret {<2 x float>, <2 x float>} %Ret1
+}

>From 56dc4ed0a88554e4a48326124ae39b6e937a0b36 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 24 Mar 2025 07:20:01 -0700
Subject: [PATCH 2/2] [SLP][REVEC] Add ExtractSubvector for ExternalUses.

For llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll,
ScalarCost and ExtraCost is 1, so the original scalar will be kept.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 21 +++++++++++++------
 .../X86/revec-ExtractSubvector.ll             | 19 +++++++----------
 .../SLPVectorizer/revec-shufflevector.ll      |  9 ++++----
 3 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0201955b8b559..7272ca6242fa8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13623,12 +13623,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
         !ExtractCostCalculated.insert(EU.Scalar).second)
       continue;
 
-    // No extract cost for vector "scalar"
-    if (isa<FixedVectorType>(EU.Scalar->getType()))
+    // No extract cost for vector "scalar" if REVEC is disabled
+    if (!SLPReVec && isa<FixedVectorType>(EU.Scalar->getType()))
       continue;
 
     // If found user is an insertelement, do not calculate extract cost but try
     // to detect it as a final shuffled/identity match.
+    // TODO: what if a user is insertvalue when REVEC is enabled?
     if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User);
         VU && VU->getOperand(1) == EU.Scalar) {
       if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
@@ -13702,7 +13703,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
     // extend the extracted value back to the original type. Here, we account
     // for the extract and the added cost of the sign extend if needed.
     InstructionCost ExtraCost = TTI::TCC_Free;
-    auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth);
+    auto *ScalarTy = EU.Scalar->getType();
+    auto *VecTy = getWidenedType(ScalarTy, BundleWidth);
     const TreeEntry *Entry = &EU.E;
     auto It = MinBWs.find(Entry);
     if (It != MinBWs.end()) {
@@ -13714,9 +13716,16 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
       ExtraCost = TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
                                                 VecTy, EU.Lane);
     } else {
-      ExtraCost =
-          TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
-                                  EU.Lane, EU.Scalar, ScalarUserAndIdx);
+      if (auto *FixedVecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
+        assert(SLPReVec && "Only supported by REVEC.");
+        ExtraCost =
+            getShuffleCost(*TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
+                           EU.Lane * FixedVecTy->getNumElements(), FixedVecTy);
+      } else {
+        ExtraCost = TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
+                                            CostKind, EU.Lane, EU.Scalar,
+                                            ScalarUserAndIdx);
+      }
     }
     // Leave the scalar instructions as is if they are cheaper than extracts.
     if (Entry->Idx != 0 || Entry->getOpcode() == Instruction::GetElementPtr ||
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
index 7548e7b2675ee..6c378ac2e583d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-ExtractSubvector.ll
@@ -14,27 +14,24 @@
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '3'
 
-; YAML: --- !Passed
+; YAML: --- !Missed
 ; YAML: Pass:            slp-vectorizer
-; YAML: Name:            VectorizedList
+; YAML: Name:            NotBeneficial
 ; YAML: Function:        StructOfVectors
 ; YAML: Args:
-; YAML:   - String:          'SLP vectorized with cost '
-; YAML:   - Cost:            '-1'
-; YAML:   - String:          ' and with tree size '
-; YAML:   - TreeSize:        '3'
+; YAML:   - String:          'List vectorization was possible but not beneficial with cost '
+; YAML:   - Cost:            '0'
+; YAML:   - String:          ' >= '
+; YAML:   - Treshold:        '0'
 
 ; Checks that vector insertvalues into the struct become SLP seeds.
 define { <2 x float>, <2 x float> } @StructOfVectors(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfVectors(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[TMP2]], i64 0)
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP6:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP5]], i64 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP6]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP5]], i64 2)
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP7]], 1
 ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } [[RET1]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
index d6c09bc224a7d..f11a0a9c024a2 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
@@ -135,15 +135,16 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    store <32 x float> [[TMP4]], ptr [[IN2:%.*]], align 16
 ; CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32
+; CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2:%.*]], i64 128
+; CHECK-NEXT:    [[TMP17:%.*]] = load <8 x float>, ptr [[IN0]], align 16
+; CHECK-NEXT:    store <32 x float> [[TMP4]], ptr [[IN2]], align 16
 ; CHECK-NEXT:    [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2]], i64 128
 ; CHECK-NEXT:    [[TMP6:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>



More information about the llvm-commits mailing list