[llvm] 3e008cb - Scalarizer : Fix vector shuffle issue when can't aligned to customized minBits. (#163912)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 8 10:06:44 PST 2025


Author: Chow
Date: 2025-12-08T18:06:39Z
New Revision: 3e008cb333d1824a1f69b01d0fab9ee69ce3d9ed

URL: https://github.com/llvm/llvm-project/commit/3e008cb333d1824a1f69b01d0fab9ee69ce3d9ed
DIFF: https://github.com/llvm/llvm-project/commit/3e008cb333d1824a1f69b01d0fab9ee69ce3d9ed.diff

LOG: Scalarizer : Fix vector shuffle issue when can't aligned to customized minBits. (#163912)

When set a value to minBits, and doing scalarizer pass, if last remained
boolean vector size can't be aligned to min bits, remained bits should
be processed each by each, and not allowed to do a direct shuffle during
packing.

Problem:
In 'concatenate' step, when processing a boolean vector, if last
remained bits (fragment) can't be aligned to minBits, but required to be
packed, those bits should be processed each by each.

A direct call to vector shuffle is to assume those remained boolean bits
can be packed to target pack size. For example, when processing a
boolean vector with `size = 7`, but set `min bits = 4`, first fragment
with `4` bits can be packed correctly, but there are still `3` bits
remained which can't be used in a vector shuffle call.

Solution:
If remained bits can't be aligned to required target (min bits) pack
size, process them each by each.
(This will mostly only influence boolean vector as they have bit width
not aligned to pow(2).)

---------

Co-authored-by: Zhou, Shaochi(AMD) <shaozhou at amd.com>

Added: 
    llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll

Modified: 
    llvm/lib/Transforms/Scalar/Scalarizer.cpp
    llvm/test/Transforms/Scalarizer/min-bits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 25a531ca33214..46f92c3de04da 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -252,7 +252,14 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
       Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
                                         Name + ".upto" + Twine(I));
     } else {
-      Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
+      if (NumPacked < VS.NumPacked) {
+        // If last pack of remained bits not match current ExtendMask size.
+        ExtendMask.truncate(NumPacked);
+        ExtendMask.resize(NumElements, -1);
+      }
+
+      Fragment = Builder.CreateShuffleVector(
+          Fragment, PoisonValue::get(Fragment->getType()), ExtendMask);
       if (I == 0) {
         Res = Fragment;
       } else {

diff  --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
index f9e6774ffff64..ca174f1a61e7a 100644
--- a/llvm/test/Transforms/Scalarizer/min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -171,7 +171,7 @@ define void @load_add_store_v4i10(ptr %pa, ptr %pb) {
 ; MIN32-NEXT:    [[C_I0:%.*]] = add <3 x i10> [[A_I0]], [[B_I0]]
 ; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <4 x i10> [[B]], i64 3
 ; MIN32-NEXT:    [[C_I1:%.*]] = add i10 [[A_I1]], [[B_I1]]
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i10> [[C_I0]], <3 x i10> [[C_I0]], <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i10> [[C_I0]], <3 x i10> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 ; MIN32-NEXT:    [[C:%.*]] = insertelement <4 x i10> [[TMP1]], i10 [[C_I1]], i64 3
 ; MIN32-NEXT:    store <4 x i10> [[C]], ptr [[PA]], align 8
 ; MIN32-NEXT:    ret void
@@ -237,7 +237,7 @@ define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b,
 ; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
 ; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
 ; MIN32-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
 ;
@@ -276,8 +276,8 @@ define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b,
 ; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], <2 x half> [[A_I1]], <2 x half> [[B_I1]]
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
 ;
@@ -338,7 +338,7 @@ define <3 x half> @unary_v3f16(<3 x half> %a) {
 ; MIN32-NEXT:    [[R_I0:%.*]] = fneg <2 x half> [[A_I0]]
 ; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
 ; MIN32-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
 ;
@@ -371,8 +371,8 @@ define <4 x half> @unary_v4f16(<4 x half> %a) {
 ; MIN32-NEXT:    [[R_I0:%.*]] = fneg <2 x half> [[A_I0]]
 ; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[R_I1:%.*]] = fneg <2 x half> [[A_I1]]
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
 ;
@@ -431,7 +431,7 @@ define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) {
 ; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
 ; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
 ; MIN32-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
 ;
@@ -470,8 +470,8 @@ define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) {
 ; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[R_I1:%.*]] = fadd <2 x half> [[A_I1]], [[B_I1]]
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
 ;
@@ -523,7 +523,7 @@ define <3 x i16> @fptosi_v3f16(<3 x half> %a) {
 ; MIN32-NEXT:    [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16>
 ; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
 ; MIN32-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x i16> [[R]]
 ;
@@ -556,8 +556,8 @@ define <4 x i16> @fptosi_v4f16(<4 x half> %a) {
 ; MIN32-NEXT:    [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16>
 ; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[R_I1:%.*]] = fptosi <2 x half> [[A_I1]] to <2 x i16>
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x i16> [[R]]
 ;
@@ -804,7 +804,7 @@ define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) {
 ;
 ; MIN32-LABEL: @load_insertelement_v3i16(
 ; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2
 ; MIN32-NEXT:    ret <3 x i16> [[R]]
 ;
@@ -836,8 +836,8 @@ define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) {
 ; MIN32-NEXT:    [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
 ; MIN32-NEXT:    [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
 ; MIN32-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1
-; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; MIN32-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x i16> [[R]]
 ;
@@ -906,8 +906,8 @@ define void @shufflevector_shrink(ptr %pa) {
 ; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
 ; MIN32-NEXT:    [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
 ; MIN32-NEXT:    [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I1]], <2 x i16> [[A_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[A:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
 ; MIN32-NEXT:    store <2 x i16> [[R]], ptr [[PA]], align 4
@@ -1221,7 +1221,7 @@ define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) {
 ; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
 ; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
 ; MIN32-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
 ;
@@ -1260,8 +1260,8 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
 ; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    [[R_I1:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I1]], <2 x half> [[B_I1]])
-; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
 ;

diff  --git a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
new file mode 100644
index 0000000000000..0d757b37f98df
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -passes="scalarizer<min-bits=32>" -S | FileCheck %s
+
+define void @vector_scalar_not_aligned(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
+; CHECK-LABEL: @vector_scalar_not_aligned(
+; CHECK-NEXT:    [[VAL1:%.*]] = load <35 x i32>, ptr addrspace(1) [[ARG_0:%.*]], align 4
+; CHECK-NEXT:    [[VAL2:%.*]] = load <35 x i32>, ptr addrspace(1) [[ARG_1:%.*]], align 4
+; CHECK-NEXT:    [[BOOLVEC1:%.*]] = icmp ne <35 x i32> [[VAL1]], zeroinitializer
+; CHECK-NEXT:    [[BOOLVEC1_I0:%.*]] = shufflevector <35 x i1> [[BOOLVEC1]], <35 x i1> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[BOOLVEC1_I1:%.*]] = shufflevector <35 x i1> [[BOOLVEC1]], <35 x i1> poison, <3 x i32> <i32 32, i32 33, i32 34>
+; CHECK-NEXT:    [[BOOLVEC2:%.*]] = icmp ne <35 x i32> [[VAL2]], zeroinitializer
+; CHECK-NEXT:    [[BOOLVEC2_I0:%.*]] = shufflevector <35 x i1> [[BOOLVEC2]], <35 x i1> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[BOOLRESULT_I0:%.*]] = xor <32 x i1> [[BOOLVEC1_I0]], [[BOOLVEC2_I0]]
+; CHECK-NEXT:    [[BOOLVEC2_I1:%.*]] = shufflevector <35 x i1> [[BOOLVEC2]], <35 x i1> poison, <3 x i32> <i32 32, i32 33, i32 34>
+; CHECK-NEXT:    [[BOOLRESULT_I1:%.*]] = xor <3 x i1> [[BOOLVEC1_I1]], [[BOOLVEC2_I1]]
+; CHECK-NEXT:    [[INST1:%.*]] = shufflevector <32 x i1> [[BOOLRESULT_I0]], <32 x i1> poison, <35 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INST2:%.*]] = shufflevector <3 x i1> [[BOOLRESULT_I1]], <3 x i1> poison, <35 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BOOLRESULT:%.*]] = shufflevector <35 x i1> [[INST1]], <35 x i1> [[INST2]], <35 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 35, i32 36, i32 37>
+; CHECK-NEXT:    [[EXT:%.*]] = zext <35 x i1> [[BOOLRESULT]] to <35 x i32>
+; CHECK-NEXT:    ret void
+
+  %val1 = load <35 x i32>, ptr addrspace(1) %arg0, align 4
+  %val2 = load <35 x i32>, ptr addrspace(1) %arg1, align 4
+  %boolVec1 = icmp ne <35 x i32> %val1, zeroinitializer
+  %boolVec2 = icmp ne <35 x i32> %val2, zeroinitializer
+  %boolResult = xor <35 x i1> %boolVec1, %boolVec2
+  %ext = zext <35 x i1> %boolResult to <35 x i32>
+  ret void
+}


        


More information about the llvm-commits mailing list