[llvm] [SLP][REVEC] Make Instruction::Select support vector instructions. (PR #100507)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 31 00:11:24 PDT 2024


https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/100507

>From 04a648b9510fa74eb38c116617fda5614e0d209b Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 28 Jun 2024 00:48:14 -0700
Subject: [PATCH 1/2] [SLP][REVEC] Pre-commit test.

---
 llvm/test/Transforms/SLPVectorizer/revec.ll | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index c2dc6d0ab73b7..5b9813f474627 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -58,3 +58,21 @@ entry:
   store <8 x i16> %4, ptr %5, align 2
   ret void
 }
+
+define void @test3(ptr %x, ptr %y, ptr %z) {
+entry:
+  %0 = getelementptr inbounds i32, ptr %x, i64 4
+  %1 = getelementptr inbounds i32, ptr %y, i64 4
+  %2 = load <4 x i32>, ptr %x, align 4
+  %3 = load <4 x i32>, ptr %0, align 4
+  %4 = load <4 x i32>, ptr %y, align 4
+  %5 = load <4 x i32>, ptr %1, align 4
+  %6 = icmp eq ptr %x, null
+  %7 = icmp eq ptr %y, null
+  %8 = select i1 %6, <4 x i32> %2, <4 x i32> %4
+  %9 = select i1 %7, <4 x i32> %3, <4 x i32> %5
+  %10 = getelementptr inbounds i32, ptr %z, i64 4
+  store <4 x i32> %8, ptr %z, align 4
+  store <4 x i32> %9, ptr %10, align 4
+  ret void
+}

>From b29ba153df7b2d8fdd75febfa99efb253d56555d Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 28 Jun 2024 02:17:28 -0700
Subject: [PATCH 2/2] [SLP][REVEC] Make Instruction::Select support vector
 instructions.

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 33 +++++++++++++++++++
 llvm/test/Transforms/SLPVectorizer/revec.ll   | 12 +++++++
 2 files changed, 45 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b74417f4606e7..87315c358b0e0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9713,6 +9713,23 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
         }
         VecCost = std::min(VecCost, IntrinsicCost);
       }
+      if (auto *SI = dyn_cast<SelectInst>(VL0)) {
+        auto *CondType =
+            getWidenedType(SI->getCondition()->getType(), VL.size());
+        unsigned CondNumElements = CondType->getNumElements();
+        unsigned VecTyNumElements = getNumElements(VecTy);
+        assert(VecTyNumElements >= CondNumElements &&
+               VecTyNumElements % CondNumElements == 0 &&
+               "Cannot vectorize Instruction::Select");
+        if (CondNumElements != VecTyNumElements) {
+          // When the return type is i1 but the source is fixed vector type, we
+          // need to duplicate the condition value.
+          VecCost += TTI->getShuffleCost(
+              TTI::SK_PermuteSingleSrc, CondType,
+              createReplicatedMask(VecTyNumElements / CondNumElements,
+                                   CondNumElements));
+        }
+      }
       return VecCost + CommonCost;
     };
     return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -13196,6 +13213,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
           False = Builder.CreateIntCast(False, VecTy, GetOperandSignedness(2));
       }
 
+      unsigned CondNumElements = getNumElements(Cond->getType());
+      unsigned TrueNumElements = getNumElements(True->getType());
+      assert(TrueNumElements >= CondNumElements &&
+             TrueNumElements % CondNumElements == 0 &&
+             "Cannot vectorize Instruction::Select");
+      assert(TrueNumElements == getNumElements(False->getType()) &&
+             "Cannot vectorize Instruction::Select");
+      if (CondNumElements != TrueNumElements) {
+        // When the return type is i1 but the source is fixed vector type, we
+        // need to duplicate the condition value.
+        Cond = Builder.CreateShuffleVector(
+            Cond, createReplicatedMask(TrueNumElements / CondNumElements,
+                                       CondNumElements));
+      }
+      assert(getNumElements(Cond->getType()) == TrueNumElements &&
+             "Cannot vectorize Instruction::Select");
       Value *V = Builder.CreateSelect(Cond, True, False);
       V = FinalShuffle(V, E, VecTy);
 
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index 5b9813f474627..a6e1061189980 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -60,6 +60,18 @@ entry:
 }
 
 define void @test3(ptr %x, ptr %y, ptr %z) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x ptr> poison, ptr [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> [[TMP0]], ptr [[Y:%.*]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x ptr> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[X]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[Y]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]]
+; CHECK-NEXT:    store <8 x i32> [[TMP6]], ptr [[Z:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = getelementptr inbounds i32, ptr %x, i64 4
   %1 = getelementptr inbounds i32, ptr %y, i64 4



More information about the llvm-commits mailing list