[llvm] [SLP][REVEC] Fix type comparison and mask transformation for REVEC. (PR #135310)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 10 20:33:31 PDT 2025


https://github.com/HanKuanChen created https://github.com/llvm/llvm-project/pull/135310

When REVEC is enabled, ScalarTy may be a FixedVectorType. Compare its
element type to decide if casting is needed. Also apply mask
transformation accordingly.

>From 1d3cbe707e319eaa1b58ea595e3f87493dd95e29 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 10 Apr 2025 20:07:50 -0700
Subject: [PATCH 1/2] [SLP][REVEC] Pre-commit test.

---
 .../SLPVectorizer/X86/revec-SplitVectorize.ll | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll
new file mode 100644
index 0000000000000..02b0becd31322
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
+
+define void @test() {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %vec.phi30 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi, %vector.body ]
+  %vec.phi31 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi40, %vector.body ]
+  %vec.phi32 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi41, %vector.body ]
+  %vec.phi33 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi42, %vector.body ]
+  %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi43, %vector.body ]
+  %vec.phi27 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi44, %vector.body ]
+  %vec.phi28 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi45, %vector.body ]
+  %vec.phi29 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi46, %vector.body ]
+  %narrow = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+  %narrow66 = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+  %narrow67 = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+  %narrow68 = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+  %not. = xor <4 x i1> zeroinitializer, zeroinitializer
+  %not.69 = xor <4 x i1> zeroinitializer, zeroinitializer
+  %not.70 = xor <4 x i1> zeroinitializer, zeroinitializer
+  %not.71 = xor <4 x i1> zeroinitializer, zeroinitializer
+  %0 = zext <4 x i1> %narrow to <4 x i32>
+  %1 = zext <4 x i1> %narrow66 to <4 x i32>
+  %2 = zext <4 x i1> %narrow67 to <4 x i32>
+  %3 = zext <4 x i1> %narrow68 to <4 x i32>
+  %4 = zext <4 x i1> %not. to <4 x i32>
+  %5 = zext <4 x i1> %not.69 to <4 x i32>
+  %6 = zext <4 x i1> %not.70 to <4 x i32>
+  %7 = zext <4 x i1> %not.71 to <4 x i32>
+  %predphi = or <4 x i32> %vec.phi30, %0
+  %predphi40 = or <4 x i32> %vec.phi31, %1
+  %predphi41 = or <4 x i32> %vec.phi32, %2
+  %predphi42 = or <4 x i32> %vec.phi33, %3
+  %predphi43 = or <4 x i32> %vec.phi, %4
+  %predphi44 = or <4 x i32> %vec.phi27, %5
+  %predphi45 = or <4 x i32> %vec.phi28, %6
+  %predphi46 = or <4 x i32> %vec.phi29, %7
+  br label %vector.body
+}

>From 3f53d36af08d4dc6b2225917d47c292690311fed Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 10 Apr 2025 20:08:15 -0700
Subject: [PATCH 2/2] [SLP][REVEC] Fix type comparison and mask transformation
 for REVEC.

When REVEC is enabled, ScalarTy may be a FixedVectorType. Compare its
element type to decide if casting is needed. Also apply mask
transformation accordingly.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 14 ++++++++---
 .../SLPVectorizer/X86/revec-SplitVectorize.ll | 25 +++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2efe107f03142..7b6953899cb8c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16998,7 +16998,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         });
       return IsSigned;
     };
-    if (cast<VectorType>(Op1->getType())->getElementType() != ScalarTy) {
+    if (cast<VectorType>(Op1->getType())->getElementType() !=
+        ScalarTy->getScalarType()) {
       assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
       Op1 = Builder.CreateIntCast(
           Op1,
@@ -17007,7 +17008,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
               cast<FixedVectorType>(Op1->getType())->getNumElements()),
           GetOperandSignedness(&OpTE1));
     }
-    if (cast<VectorType>(Op2->getType())->getElementType() != ScalarTy) {
+    if (cast<VectorType>(Op2->getType())->getElementType() !=
+        ScalarTy->getScalarType()) {
       assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
       Op2 = Builder.CreateIntCast(
           Op2,
@@ -17022,9 +17024,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
           Mask.begin(),
           std::next(Mask.begin(), E->CombinedEntriesWithIndices.back().second),
           0);
+      unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+      if (ScalarTyNumElements != 1) {
+        assert(SLPReVec && "Only supported by REVEC.");
+        transformScalarShuffleIndiciesToVector(ScalarTyNumElements, Mask);
+      }
       Value *Vec = Builder.CreateShuffleVector(Op1, Mask);
       Vec = createInsertVector(Builder, Vec, Op2,
-                               E->CombinedEntriesWithIndices.back().second);
+                               E->CombinedEntriesWithIndices.back().second *
+                                   ScalarTyNumElements);
       E->VectorizedValue = Vec;
       return Vec;
     }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll
index 02b0becd31322..a1a6cfe30cd1b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll
@@ -2,6 +2,31 @@
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
 
 define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> poison, <4 x i32> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP0]], <4 x i32> zeroinitializer, i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP2]], <4 x i32> zeroinitializer, i64 12)
+; CHECK-NEXT:    [[TMP4:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP3]], <4 x i32> zeroinitializer, i64 16)
+; CHECK-NEXT:    [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP4]], <4 x i32> zeroinitializer, i64 20)
+; CHECK-NEXT:    [[TMP6:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP5]], <4 x i32> zeroinitializer, i64 24)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP6]], <4 x i32> zeroinitializer, i64 28)
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc <32 x i32> [[TMP7]] to <32 x i1>
+; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> poison, <4 x i1> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP9]], <4 x i1> zeroinitializer, i64 4)
+; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP10]], <4 x i1> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP12:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP11]], <4 x i1> zeroinitializer, i64 12)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi <32 x i1> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = select <16 x i1> [[TMP12]], <16 x i1> [[TMP12]], <16 x i1> [[TMP12]]
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i1> [[TMP12]], [[TMP12]]
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i1> [[TMP14]], <16 x i1> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP17:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v16i1(<32 x i1> [[TMP16]], <16 x i1> [[TMP15]], i64 16)
+; CHECK-NEXT:    [[TMP18]] = or <32 x i1> [[TMP13]], [[TMP17]]
+; CHECK-NEXT:    br label [[VECTOR_BODY]]
+;
 entry:
   br label %vector.body
 



More information about the llvm-commits mailing list