[llvm] [SLP][REVEC] Make getAltInstrMask and getGatherCost vectorize vector instructions. (PR #99461)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 25 02:13:17 PDT 2024


https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/99461

>From e3aa370e5951ff087180f7ad1e6f20e10b0ea034 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 24 Jul 2024 02:31:00 -0700
Subject: [PATCH 1/4] [SLP][REVEC] Pre-commit test.

---
 .../X86/revec-reduction-logical.ll              | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
new file mode 100644
index 0000000000000..b1acea9af61bf
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -slp-revec -mtriple=x86_64 -S | FileCheck %s
+
+define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
+  %x0 = extractelement <4 x i32> %x, i32 0
+  %x1 = extractelement <4 x i32> %x, i32 1
+  %x2 = extractelement <4 x i32> %x, i32 2
+  %x3 = extractelement <4 x i32> %x, i32 3
+  %c0 = icmp ult i32 %x0, 0
+  %c1 = icmp slt i32 %x1, 0
+  %c2 = icmp sgt i32 %x2, 0
+  %c3 = icmp slt i32 %x3, 0
+  %s1 = select i1 %c0, i1 %c1, i1 false
+  %s2 = select i1 %s1, i1 %c2, i1 false
+  %s3 = select i1 %s2, i1 %c3, i1 false
+  ret i1 %s3
+}

>From 54ea15c97902aee093305023a99f0bb32ca2de2a Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 28 Jun 2024 02:30:43 -0700
Subject: [PATCH 2/4] [SLP][REVEC] Make getAltInstrMask support vector
 instructions.

Fix "Mask and VecTy are incompatible".
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp        |  7 +++++--
 .../SLPVectorizer/X86/revec-reduction-logical.ll       | 10 ++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b74417f4606e7..2a28a0b581769 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1032,10 +1032,13 @@ static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
 /// Opcode1.
 SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
                                unsigned Opcode1) {
-  SmallBitVector OpcodeMask(VL.size(), false);
+  Type *ScalarTy = VL[0]->getType();
+  unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+  SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false);
   for (unsigned Lane : seq<unsigned>(VL.size()))
     if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
-      OpcodeMask.set(Lane);
+      OpcodeMask.set(Lane * ScalarTyNumElements,
+                     Lane * ScalarTyNumElements + ScalarTyNumElements);
   return OpcodeMask;
 }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
index b1acea9af61bf..8be831f53cf8c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
@@ -2,6 +2,16 @@
 ; RUN: opt < %s -passes=slp-vectorizer -slp-revec -mtriple=x86_64 -S | FileCheck %s
 
 define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
+; CHECK-LABEL: @logical_and_icmp_diff_preds(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT:    ret i1 [[TMP7]]
+;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
   %x2 = extractelement <4 x i32> %x, i32 2

>From 2d8e0b2d898ffaef5de52a3dbe251a56a37cadbc Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 24 Jul 2024 02:47:58 -0700
Subject: [PATCH 3/4] [SLP][REVEC] Pre-commit test.

---
 .../X86/revec-reduction-logical.ll            | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
index 8be831f53cf8c..99882e3100005 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
@@ -25,3 +25,26 @@ define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
   %s3 = select i1 %s2, i1 %c3, i1 false
   ret i1 %s3
 }
+
+define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
+  %x0 = extractelement <4 x i32> %x, i32 0
+  %x1 = extractelement <4 x i32> %x, i32 1
+  %x2 = extractelement <4 x i32> %x, i32 2
+  %x3 = extractelement <4 x i32> %x, i32 3
+  %c0 = icmp slt i32 %x0, 42
+  %c1 = icmp slt i32 %x1, 42
+  %c2 = icmp slt i32 %x2, 42
+  %c3 = icmp slt i32 %x3, 42
+  %d0 = icmp sgt i32 %x0, 17
+  %d1 = icmp sgt i32 %x1, 17
+  %d2 = icmp sgt i32 %x2, 17
+  %d3 = icmp sgt i32 %x3, 17
+  %s1 = select i1 %c0, i1 %c1, i1 false
+  %s2 = select i1 %s1, i1 %c2, i1 false
+  %s3 = select i1 %s2, i1 %c3, i1 false
+  %s4 = select i1 %s3, i1 %d0, i1 false
+  %s5 = select i1 %s4, i1 %d1, i1 false
+  %s6 = select i1 %s5, i1 %d2, i1 false
+  %s7 = select i1 %s6, i1 %d3, i1 false
+  ret i1 %s7
+}

>From d04b0f8afe06c65b6b5166cc5f11e51b64d096d3 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 28 Jun 2024 02:41:19 -0700
Subject: [PATCH 4/4] [SLP][REVEC] Make getGatherCost support vector
 instructions.

Fix "Vector size mismatch".
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp          | 9 ++++++---
 .../SLPVectorizer/X86/revec-reduction-logical.ll         | 9 +++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2a28a0b581769..25111ee6faba9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11361,7 +11361,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
   // Find the cost of inserting/extracting values from the vector.
   // Check if the same elements are inserted several times and count them as
   // shuffle candidates.
-  APInt ShuffledElements = APInt::getZero(VL.size());
+  unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+  APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
   DenseMap<Value *, unsigned> UniqueElements;
   constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   InstructionCost Cost;
@@ -11381,7 +11382,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
     Value *V = VL[I];
     // No need to shuffle duplicates for constants.
     if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
-      ShuffledElements.setBit(I);
+      ShuffledElements.setBits(I * ScalarTyNumElements,
+                               I * ScalarTyNumElements + ScalarTyNumElements);
       ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
       continue;
     }
@@ -11394,7 +11396,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
     }
 
     DuplicateNonConst = true;
-    ShuffledElements.setBit(I);
+    ShuffledElements.setBits(I * ScalarTyNumElements,
+                             I * ScalarTyNumElements + ScalarTyNumElements);
     ShuffleMask[I] = Res.first->second;
   }
   if (ForPoisonSrc)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
index 99882e3100005..dfe9799b2a7de 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
@@ -27,6 +27,15 @@ define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
 }
 
 define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
+; CHECK-LABEL: @logical_and_icmp_clamp(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
   %x2 = extractelement <4 x i32> %x, i32 2



More information about the llvm-commits mailing list