[llvm] Scalarizer : Fix vector shuffle issue when can't aligned to customized minBits. (PR #163912)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 3 21:25:06 PST 2025


https://github.com/ShchchowAMD updated https://github.com/llvm/llvm-project/pull/163912

>From cca6c72c4cfb43a13231c423948f9139dcb4c22f Mon Sep 17 00:00:00 2001
From: "Zhou, Shaochi(AMD)" <shaozhou at amd.com>
Date: Fri, 17 Oct 2025 14:09:47 +0800
Subject: [PATCH] Scalarizer : Fix vector shuffle issue when can't aligned to
 customized minBits.

When set a value to minBits, when doing scalarizer pass, if last remained
boolean vector size can't be aligned to min bits, remained bits should be processed each by each, and not allowed to do a direct shuffle.
---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp     | 20 +++++++++--
 .../vector-bool-not-aligned-min-bits.ll       | 34 +++++++++++++++++++
 2 files changed, 52 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 25a531ca33214..57395d375d177 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -252,14 +252,30 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
       Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
                                         Name + ".upto" + Twine(I));
     } else {
+      if (NumPacked < VS.NumPacked) {
+        // If last pack of remained bits not aligned to target pack size.
+        ExtendMask.resize(NumPacked);
+      }
+
       Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
       if (I == 0) {
         Res = Fragment;
       } else {
         for (unsigned J = 0; J < NumPacked; ++J)
           InsertMask[I * VS.NumPacked + J] = NumElements + J;
-        Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
-                                          Name + ".upto" + Twine(I));
+
+        if (NumPacked < VS.NumPacked) {
+          for (unsigned J = 0; J < NumPacked; ++J) {
+            auto FragmentBit = Builder.CreateExtractElement(Fragment, J);
+            Res = Builder.CreateInsertElement(Res, FragmentBit,
+                                              I * VS.NumPacked + J);
+          }
+          Res->setName(Name + ".upto" + Twine(I));
+        } else {
+          Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
+                                            Name + ".upto" + Twine(I));
+        }
+
         for (unsigned J = 0; J < NumPacked; ++J)
           InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
       }
diff --git a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
new file mode 100644
index 0000000000000..30fe3f5614ace
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -passes="scalarizer<min-bits=8>" -S | FileCheck %s
+
+define void @vector_scalar_not_aligned(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
+; CHECK-LABEL: @vector_scalar_not_aligned(
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_0:%.*]], align 4
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load <11 x i32>, ptr addrspace(1) [[ARG_1:%.*]], align 4
+; CHECK-NEXT:    [[BOOL_VEC_B1:%.*]] = icmp ne <11 x i32> [[VAL_I1]], zeroinitializer
+; CHECK-NEXT:    [[BOOL_VEC_B1_I0:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B1]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[BOOL_VEC_B1_I1:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B1]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
+; CHECK-NEXT:    [[BOOL_VEC_B2:%.*]] = icmp ne <11 x i32> [[VAL_I2]], zeroinitializer
+; CHECK-NEXT:    [[BOOL_VEC_B2_I0:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B2]], <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[BOOL_RESULT_I0:%.*]] = xor <8 x i1> [[BOOL_VEC_B1_I0]], [[BOOL_VEC_B2_I0]]
+; CHECK-NEXT:    [[BOOL_VEC_B2_I1:%.*]] = shufflevector <11 x i1> [[BOOL_VEC_B2]], <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
+; CHECK-NEXT:    [[BOOL_RESULT_I1:%.*]] = xor <3 x i1> [[BOOL_VEC_B1_I1]], [[BOOL_VEC_B2_I1]]
+; CHECK-NEXT:    [[ELEM_1:%.*]] = shufflevector <8 x i1> [[BOOL_RESULT_I0]], <8 x i1> [[BOOL_RESULT_I0]], <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[ELEM_2:%.*]] = shufflevector <3 x i1> [[BOOL_RESULT_I1]], <3 x i1> [[BOOL_RESULT_I1]], <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[ELEM_3:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 0
+; CHECK-NEXT:    [[ELEM_4:%.*]] = insertelement <11 x i1> [[ELEM_1]], i1 [[ELEM_3]], i64 8
+; CHECK-NEXT:    [[ELEM_5:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 1
+; CHECK-NEXT:    [[ELEM_6:%.*]] = insertelement <11 x i1> [[ELEM_4]], i1 [[ELEM_5]], i64 9
+; CHECK-NEXT:    [[ELEM_7:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 2
+; CHECK-NEXT:    [[BOOL_RESULT:%.*]] = insertelement <11 x i1> [[ELEM_6]], i1 [[ELEM_7]], i64 10
+; CHECK-NEXT:    [[EXT:%.*]] = zext <11 x i1> [[BOOL_RESULT]] to <11 x i32>
+; CHECK-NEXT:    ret void
+
+  %val1 = load <11 x i32>, ptr addrspace(1) %arg0, align 4
+  %val2 = load <11 x i32>, ptr addrspace(1) %arg1, align 4
+  %boolVec1 = icmp ne <11 x i32> %val1, zeroinitializer
+  %boolVec2 = icmp ne <11 x i32> %val2, zeroinitializer
+  %boolResult = xor <11 x i1> %boolVec1, %boolVec2
+  %ext = zext <11 x i1> %boolResult to <11 x i32>
+  ret void
+}
\ No newline at end of file



More information about the llvm-commits mailing list