[llvm] Scalarizer : Fix vector shuffle issue when can't aligned to customized minBits. (PR #163912)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 18:49:48 PST 2025
https://github.com/ShchchowAMD updated https://github.com/llvm/llvm-project/pull/163912
>From cca6c72c4cfb43a13231c423948f9139dcb4c22f Mon Sep 17 00:00:00 2001
From: "Zhou, Shaochi(AMD)" <shaozhou at amd.com>
Date: Fri, 17 Oct 2025 14:09:47 +0800
Subject: [PATCH 1/3] Scalarizer : Fix vector shuffle issue when can't aligned
to customized minBits.
When set a value to minBits, when doing scalarizer pass, if last remained
boolean vector size can't be aligned to min bits, remained bits should be processed each by each, and not allowed to do a direct shuffle.
---
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 20 +++++++++--
.../vector-bool-not-aligned-min-bits.ll | 34 +++++++++++++++++++
2 files changed, 52 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 25a531ca33214..57395d375d177 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -252,14 +252,30 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
Name + ".upto" + Twine(I));
} else {
+ if (NumPacked < VS.NumPacked) {
+ // If last pack of remained bits not aligned to target pack size.
+ ExtendMask.resize(NumPacked);
+ }
+
Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
if (I == 0) {
Res = Fragment;
} else {
for (unsigned J = 0; J < NumPacked; ++J)
InsertMask[I * VS.NumPacked + J] = NumElements + J;
- Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
- Name + ".upto" + Twine(I));
+
+ if (NumPacked < VS.NumPacked) {
+ for (unsigned J = 0; J < NumPacked; ++J) {
+ auto FragmentBit = Builder.CreateExtractElement(Fragment, J);
+ Res = Builder.CreateInsertElement(Res, FragmentBit,
+ I * VS.NumPacked + J);
+ }
+ Res->setName(Name + ".upto" + Twine(I));
+ } else {
+ Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
+ Name + ".upto" + Twine(I));
+ }
+
for (unsigned J = 0; J < NumPacked; ++J)
InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
}
diff --git a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
new file mode 100644
index 0000000000000..30fe3f5614ace
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -passes="scalarizer<min-bits=8>" -S | FileCheck %s
+
+define void @vector_scalar_not_aligned(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
+; CHECK-LABEL: @vector_scalar_not_aligned(
+; CHECK-NEXT: [[VAL_I1:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_0:%.*]], align 4
+; CHECK-NEXT: [[VAL_I2:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_1:%.*]], align 4
+; CHECK-NEXT: [[BOOL_VEC_B1:%.*]] = icmp ne <11 x i32> [[VAL_I1]], zeroinitializer
+; CHECK-NEXT: [[BOOL_VEC_B1_I0:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B1]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[BOOL_VEC_B1_I1:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B1]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
+; CHECK-NEXT: [[BOOL_VEC_B2:%.*]] = icmp ne <11 x i32> [[VAL_I2]], zeroinitializer
+; CHECK-NEXT: [[BOOL_VEC_B2_I0:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B2]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[BOOL_RESULT_I0:%.*]] = xor <8 x i1> [[BOOL_VEC_B1_I0]], [[BOOL_VEC_B2_I0]]
+; CHECK-NEXT: [[BOOL_VEC_B2_I1:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B2]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
+; CHECK-NEXT: [[BOOL_RESULT_I1:%.*]] = xor <3 x i1> [[BOOL_VEC_B1_I1]], [[BOOL_VEC_B2_I1]]
+; CHECK-NEXT: [[ELEM_1:%.*]] = shufflevector <8 x i1> [[BOOL_RESULT_I0]], <8 x i1> [[BOOL_RESULT_I0]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[ELEM_2:%.*]] = shufflevector <3 x i1> [[BOOL_RESULT_I1]], <3 x i1> [[BOOL_RESULT_I1]], <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[ELEM_3:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 0
+; CHECK-NEXT: [[ELEM_4:%.*]] = insertelement <11 x i1> [[ELEM_1]], i1 [[ELEM_3]], i64 8
+; CHECK-NEXT: [[ELEM_5:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 1
+; CHECK-NEXT: [[ELEM_6:%.*]] = insertelement <11 x i1> [[ELEM_4]], i1 [[ELEM_5]], i64 9
+; CHECK-NEXT: [[ELEM_7:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 2
+; CHECK-NEXT: [[BOOL_RESULT:%.*]] = insertelement <11 x i1> [[ELEM_6]], i1 [[ELEM_7]], i64 10
+; CHECK-NEXT: [[EXT:%.*]] = zext <11 x i1> [[BOOL_RESULT]] to <11 x i32>
+; CHECK-NEXT: ret void
+
+ %val1 = load <11 x i32>, ptr addrspace(1) %arg0, align 4
+ %val2 = load <11 x i32>, ptr addrspace(1) %arg1, align 4
+ %boolVec1 = icmp ne <11 x i32> %val1, zeroinitializer
+ %boolVec2 = icmp ne <11 x i32> %val2, zeroinitializer
+ %boolResult = xor <11 x i1> %boolVec1, %boolVec2
+ %ext = zext <11 x i1> %boolResult to <11 x i32>
+ ret void
+}
\ No newline at end of file
>From 01c4fd3c20194694776fef369f6aad2f91ef1cec Mon Sep 17 00:00:00 2001
From: "Zhou, Shaochi(AMD)" <shaozhou at amd.com>
Date: Thu, 4 Dec 2025 16:33:41 +0800
Subject: [PATCH 2/3] Refine code to use shuffleVector to let too small
fragments be albe to insert into final scalarized results.
---
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 26 +++++++---------
.../vector-bool-not-aligned-min-bits.ll | 30 ++++++++-----------
2 files changed, 24 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 57395d375d177..ad8fd9b781a55 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -252,9 +252,16 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
Name + ".upto" + Twine(I));
} else {
- if (NumPacked < VS.NumPacked) {
+ unsigned remainedSize = NumPacked;
+ while (remainedSize <= VS.NumPacked / 2) {
// If last pack of remained bits not aligned to target pack size.
- ExtendMask.resize(NumPacked);
+ remainedSize = remainedSize * 2;
+ SmallVector<int> SmallExtendMask;
+ SmallExtendMask.resize(remainedSize, -1);
+ for (unsigned I = 0; I < remainedSize; ++I)
+ SmallExtendMask[I] = I;
+ Fragment =
+ Builder.CreateShuffleVector(Fragment, Fragment, SmallExtendMask);
}
Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
@@ -263,19 +270,8 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
} else {
for (unsigned J = 0; J < NumPacked; ++J)
InsertMask[I * VS.NumPacked + J] = NumElements + J;
-
- if (NumPacked < VS.NumPacked) {
- for (unsigned J = 0; J < NumPacked; ++J) {
- auto FragmentBit = Builder.CreateExtractElement(Fragment, J);
- Res = Builder.CreateInsertElement(Res, FragmentBit,
- I * VS.NumPacked + J);
- }
- Res->setName(Name + ".upto" + Twine(I));
- } else {
- Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
- Name + ".upto" + Twine(I));
- }
-
+ Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
+ Name + ".upto" + Twine(I));
for (unsigned J = 0; J < NumPacked; ++J)
InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
}
diff --git a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
index 30fe3f5614ace..3ac91342c7c59 100644
--- a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
@@ -5,23 +5,19 @@ define void @vector_scalar_not_aligned(ptr addrspace(1) %arg0, ptr addrspace(1)
; CHECK-LABEL: @vector_scalar_not_aligned(
; CHECK-NEXT: [[VAL_I1:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_0:%.*]], align 4
; CHECK-NEXT: [[VAL_I2:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_1:%.*]], align 4
-; CHECK-NEXT: [[BOOL_VEC_B1:%.*]] = icmp ne <11 x i32> [[VAL_I1]], zeroinitializer
-; CHECK-NEXT: [[BOOL_VEC_B1_I0:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B1]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[BOOL_VEC_B1_I1:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B1]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
-; CHECK-NEXT: [[BOOL_VEC_B2:%.*]] = icmp ne <11 x i32> [[VAL_I2]], zeroinitializer
-; CHECK-NEXT: [[BOOL_VEC_B2_I0:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B2]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[BOOL_RESULT_I0:%.*]] = xor <8 x i1> [[BOOL_VEC_B1_I0]], [[BOOL_VEC_B2_I0]]
-; CHECK-NEXT: [[BOOL_VEC_B2_I1:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B2]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
-; CHECK-NEXT: [[BOOL_RESULT_I1:%.*]] = xor <3 x i1> [[BOOL_VEC_B1_I1]], [[BOOL_VEC_B2_I1]]
-; CHECK-NEXT: [[ELEM_1:%.*]] = shufflevector <8 x i1> [[BOOL_RESULT_I0]], <8 x i1> [[BOOL_RESULT_I0]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[ELEM_2:%.*]] = shufflevector <3 x i1> [[BOOL_RESULT_I1]], <3 x i1> [[BOOL_RESULT_I1]], <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[ELEM_3:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 0
-; CHECK-NEXT: [[ELEM_4:%.*]] = insertelement <11 x i1> [[ELEM_1]], i1 [[ELEM_3]], i64 8
-; CHECK-NEXT: [[ELEM_5:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 1
-; CHECK-NEXT: [[ELEM_6:%.*]] = insertelement <11 x i1> [[ELEM_4]], i1 [[ELEM_5]], i64 9
-; CHECK-NEXT: [[ELEM_7:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 2
-; CHECK-NEXT: [[BOOL_RESULT:%.*]] = insertelement <11 x i1> [[ELEM_6]], i1 [[ELEM_7]], i64 10
-; CHECK-NEXT: [[EXT:%.*]] = zext <11 x i1> [[BOOL_RESULT]] to <11 x i32>
+; CHECK-NEXT: [[BOOLVEC1:%.*]] = icmp ne <11 x i32> [[VAL_I1]], zeroinitializer
+; CHECK-NEXT: [[BOOLVEC1_I0:%.*]] = shufflevector <11 x i1> [[BOOLVEC1]] <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[BOOLVEC1_I1:%.*]] = shufflevector <11 x i1> [[BOOLVEC1]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
+; CHECK-NEXT: [[BOOLVEC2:%.*]] = icmp ne <11 x i32> [[VAL_I2]], zeroinitializer
+; CHECK-NEXT: [[BOOLVEC2_I0:%.*]] = shufflevector <11 x i1> [[BOOLVEC2]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[BOOLRESULT_I0:%.*]] = xor <8 x i1> [[BOOLVEC1_I0]], [[BOOLVEC2_I0]]
+; CHECK-NEXT: [[BOOLVEC2_I1:%.*]] = shufflevector <11 x i1> [[BOOLVEC2]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
+; CHECK-NEXT: [[BOOLRESULT_I1:%.*]] = xor <3 x i1> [[BOOLVEC1_I1]], [[BOOLVEC2_I1]]
+; CHECK-NEXT: [[SSA1:%.*]] = shufflevector <8 x i1> [[BOOLRESULT_I0]], <8 x i1> [[BOOLRESULT_I0]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[SSA2:%.*]] = shufflevector <3 x i1> [[BOOLRESULT_I1]], <3 x i1> [[BOOLRESULT_I1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT: [[SSA3:%.*]] = shufflevector <6 x i1> [[SSA2]], <6 x i1> [[SSA2]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[BOOLRESULT:%.*]] = shufflevector <11 x i1> [[SSA1]], <11 x i1> [[SSA3]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 12, i32 13>
+; CHECK-NEXT: [[EXT:%.*]] = zext <11 x i1> [[BOOLRESULT]] to <11 x i32>
; CHECK-NEXT: ret void
%val1 = load <11 x i32>, ptr addrspace(1) %arg0, align 4
>From bcdab5e30d72d56c2f0353b65ca13f19bc2a310a Mon Sep 17 00:00:00 2001
From: "Zhou, Shaochi(AMD)" <shaozhou at amd.com>
Date: Fri, 5 Dec 2025 10:44:24 +0800
Subject: [PATCH 3/3] Only truncate and resize extendMask to solve the issue,
refine test to make it more significant.
---
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 17 +++-----
.../vector-bool-not-aligned-min-bits.ll | 41 +++++++++----------
2 files changed, 26 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index ad8fd9b781a55..67bb7aa9bc3e6 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -252,19 +252,14 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
Name + ".upto" + Twine(I));
} else {
- unsigned remainedSize = NumPacked;
- while (remainedSize <= VS.NumPacked / 2) {
- // If last pack of remained bits not aligned to target pack size.
- remainedSize = remainedSize * 2;
- SmallVector<int> SmallExtendMask;
- SmallExtendMask.resize(remainedSize, -1);
- for (unsigned I = 0; I < remainedSize; ++I)
- SmallExtendMask[I] = I;
- Fragment =
- Builder.CreateShuffleVector(Fragment, Fragment, SmallExtendMask);
+ if (NumPacked <= VS.NumPacked / 2) {
+ // If last pack of remained bits not match current ExtendMask size.
+ ExtendMask.truncate(NumPacked);
+ ExtendMask.resize(NumElements, -1);
}
- Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
+ Fragment = Builder.CreateShuffleVector(
+ Fragment, PoisonValue::get(Fragment->getType()), ExtendMask);
if (I == 0) {
Res = Fragment;
} else {
diff --git a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
index 3ac91342c7c59..1016b2567df11 100644
--- a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
@@ -1,30 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -passes="scalarizer<min-bits=8>" -S | FileCheck %s
+; RUN: opt %s -passes="scalarizer<min-bits=32>" -S | FileCheck %s
define void @vector_scalar_not_aligned(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
; CHECK-LABEL: @vector_scalar_not_aligned(
-; CHECK-NEXT: [[VAL_I1:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_0:%.*]], align 4
-; CHECK-NEXT: [[VAL_I2:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_1:%.*]], align 4
-; CHECK-NEXT: [[BOOLVEC1:%.*]] = icmp ne <11 x i32> [[VAL_I1]], zeroinitializer
-; CHECK-NEXT: [[BOOLVEC1_I0:%.*]] = shufflevector <11 x i1> [[BOOLVEC1]] <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[BOOLVEC1_I1:%.*]] = shufflevector <11 x i1> [[BOOLVEC1]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
-; CHECK-NEXT: [[BOOLVEC2:%.*]] = icmp ne <11 x i32> [[VAL_I2]], zeroinitializer
-; CHECK-NEXT: [[BOOLVEC2_I0:%.*]] = shufflevector <11 x i1> [[BOOLVEC2]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[BOOLRESULT_I0:%.*]] = xor <8 x i1> [[BOOLVEC1_I0]], [[BOOLVEC2_I0]]
-; CHECK-NEXT: [[BOOLVEC2_I1:%.*]] = shufflevector <11 x i1> [[BOOLVEC2]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
+; CHECK-NEXT: [[VAL1:%.*]] = load <35 x i32>, ptr addrspace(1) [[ARG_0:%.*]], align 4
+; CHECK-NEXT: [[VAL2:%.*]] = load <35 x i32>, ptr addrspace(1) [[ARG_1:%.*]], align 4
+; CHECK-NEXT: [[BOOLVEC1:%.*]] = icmp ne <35 x i32> [[VAL1]], zeroinitializer
+; CHECK-NEXT: [[BOOLVEC1_I0:%.*]] = shufflevector <35 x i1> [[BOOLVEC1]], <35 x i1> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[BOOLVEC1_I1:%.*]] = shufflevector <35 x i1> [[BOOLVEC1]], <35 x i1> poison, <3 x i32> <i32 32, i32 33, i32 34>
+; CHECK-NEXT: [[BOOLVEC2:%.*]] = icmp ne <35 x i32> [[VAL2]], zeroinitializer
+; CHECK-NEXT: [[BOOLVEC2_I0:%.*]] = shufflevector <35 x i1> [[BOOLVEC2]], <35 x i1> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[BOOLRESULT_I0:%.*]] = xor <32 x i1> [[BOOLVEC1_I0]], [[BOOLVEC2_I0]]
+; CHECK-NEXT: [[BOOLVEC2_I1:%.*]] = shufflevector <35 x i1> [[BOOLVEC2]], <35 x i1> poison, <3 x i32> <i32 32, i32 33, i32 34>
; CHECK-NEXT: [[BOOLRESULT_I1:%.*]] = xor <3 x i1> [[BOOLVEC1_I1]], [[BOOLVEC2_I1]]
-; CHECK-NEXT: [[SSA1:%.*]] = shufflevector <8 x i1> [[BOOLRESULT_I0]], <8 x i1> [[BOOLRESULT_I0]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[SSA2:%.*]] = shufflevector <3 x i1> [[BOOLRESULT_I1]], <3 x i1> [[BOOLRESULT_I1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT: [[SSA3:%.*]] = shufflevector <6 x i1> [[SSA2]], <6 x i1> [[SSA2]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[BOOLRESULT:%.*]] = shufflevector <11 x i1> [[SSA1]], <11 x i1> [[SSA3]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 12, i32 13>
-; CHECK-NEXT: [[EXT:%.*]] = zext <11 x i1> [[BOOLRESULT]] to <11 x i32>
+; CHECK-NEXT: [[INST1:%.*]] = shufflevector <32 x i1> [[BOOLRESULT_I0]], <32 x i1> poison, <35 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[INST2:%.*]] = shufflevector <3 x i1> [[BOOLRESULT_I1]], <3 x i1> poison, <35 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[BOOLRESULT:%.*]] = shufflevector <35 x i1> [[INST1]], <35 x i1> [[INST2]], <35 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 35, i32 36, i32 37>
+; CHECK-NEXT: [[EXT:%.*]] = zext <35 x i1> [[BOOLRESULT]] to <35 x i32>
; CHECK-NEXT: ret void
- %val1 = load <11 x i32>, ptr addrspace(1) %arg0, align 4
- %val2 = load <11 x i32>, ptr addrspace(1) %arg1, align 4
- %boolVec1 = icmp ne <11 x i32> %val1, zeroinitializer
- %boolVec2 = icmp ne <11 x i32> %val2, zeroinitializer
- %boolResult = xor <11 x i1> %boolVec1, %boolVec2
- %ext = zext <11 x i1> %boolResult to <11 x i32>
+ %val1 = load <35 x i32>, ptr addrspace(1) %arg0, align 4
+ %val2 = load <35 x i32>, ptr addrspace(1) %arg1, align 4
+ %boolVec1 = icmp ne <35 x i32> %val1, zeroinitializer
+ %boolVec2 = icmp ne <35 x i32> %val2, zeroinitializer
+ %boolResult = xor <35 x i1> %boolVec1, %boolVec2
+ %ext = zext <35 x i1> %boolResult to <35 x i32>
ret void
}
\ No newline at end of file
More information about the llvm-commits
mailing list