[llvm] Scalarizer : Fix vector shuffle issue when can't aligned to customized minBits. (PR #163912)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 16 23:29:39 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Chow (ShchchowAMD)

<details>
<summary>Changes</summary>

When set a value to minBits, and doing scalarizer pass, if last remained boolean vector size can't be aligned to min bits, remained bits should be processed each by each, and not allowed to do a direct shuffle during packing.

Problem:
In 'concatenate' step, when processing a boolean vector, if last remained bits (fragment) can't be aligned to minBits, but required to be packed, those bits should be processed each by each.

A direct call to vector shuffle is to assume those remained boolean bits can be packed to target pack size. For example, when processing a boolean vector with `size = 7`, but set `min bits = 4`, first fragment with `4` bits can be packed correctly, but there are still `3` bits remained which can't be used in a vector shuffle call.

Solution:
If remained bits can't be aligned to required target (min bits) pack size, process them each by each.
(This will mostly only influence boolean vector as they have bit width not aligned to pow(2).)

---
Full diff: https://github.com/llvm/llvm-project/pull/163912.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/Scalarizer.cpp (+18-2) 
- (added) llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll (+34) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 25a531ca33214..57395d375d177 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -252,14 +252,30 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
       Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
                                         Name + ".upto" + Twine(I));
     } else {
+      if (NumPacked < VS.NumPacked) {
+        // If last pack of remained bits not aligned to target pack size.
+        ExtendMask.resize(NumPacked);
+      }
+
       Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
       if (I == 0) {
         Res = Fragment;
       } else {
         for (unsigned J = 0; J < NumPacked; ++J)
           InsertMask[I * VS.NumPacked + J] = NumElements + J;
-        Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
-                                          Name + ".upto" + Twine(I));
+
+        if (NumPacked < VS.NumPacked) {
+          for (unsigned J = 0; J < NumPacked; ++J) {
+            auto FragmentBit = Builder.CreateExtractElement(Fragment, J);
+            Res = Builder.CreateInsertElement(Res, FragmentBit,
+                                              I * VS.NumPacked + J);
+          }
+          Res->setName(Name + ".upto" + Twine(I));
+        } else {
+          Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
+                                            Name + ".upto" + Twine(I));
+        }
+
         for (unsigned J = 0; J < NumPacked; ++J)
           InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
       }
diff --git a/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
new file mode 100644
index 0000000000000..0a9e8365674c6
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/vector-bool-not-aligned-min-bits.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -passes="scalarizer<min-bits=4>" -S | FileCheck %s
+
+define void @vector_scalar_not_aligned(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
+; CHECK-LABEL: @vector_scalar_not_aligned(
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load <7 x i32>, ptr addrspace(1) [[ARG_0:%.*]], align 4
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load <7 x i32>, ptr addrspace(1) [[ARG_1:%.*]], align 4
+; CHECK-NEXT:    [[BOOL_VEC_B1:%.*]] = icmp ne <7 x i32> [[VAL_I1]], zeroinitializer
+; CHECK-NEXT:    [[BOOL_VEC_B1_I0:%.*]] = shufflevector <7 x i1> [[BOOL_VEC_B1]], <7 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[BOOL_VEC_B1_I1:%.*]] = shufflevector <7 x i1> [[BOOL_VEC_B1]], <7 x i1> poison, <3 x i32> <i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[BOOL_VEC_B2:%.*]] = icmp ne <7 x i32> [[VAL_I2]], zeroinitializer
+; CHECK-NEXT:    [[BOOL_VEC_B2_I0:%.*]] = shufflevector <7 x i1> [[BOOL_VEC_B2]], <7 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[BOOL_RESULT_I0:%.*]] = xor <4 x i1> [[BOOL_VEC_B1_I0]], [[BOOL_VEC_B2_I0]]
+; CHECK-NEXT:    [[BOOL_VEC_B2_I1:%.*]] = shufflevector <7 x i1> [[BOOL_VEC_B2]], <7 x i1> poison, <3 x i32> <i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[BOOL_RESULT_I1:%.*]] = xor <3 x i1> [[BOOL_VEC_B1_I1]], [[BOOL_VEC_B2_I1]]
+; CHECK-NEXT:    [[ELEM_1:%.*]] = shufflevector <4 x i1> [[BOOL_RESULT_I0]], <4 x i1> [[BOOL_RESULT_I0]], <7 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[ELEM_2:%.*]] = shufflevector <3 x i1> [[BOOL_RESULT_I1]], <3 x i1> [[BOOL_RESULT_I1]], <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[ELEM_3:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 0
+; CHECK-NEXT:    [[ELEM_4:%.*]] = insertelement <7 x i1> [[ELEM_1]], i1 [[ELEM_3]], i64 4
+; CHECK-NEXT:    [[ELEM_5:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 1
+; CHECK-NEXT:    [[ELEM_6:%.*]] = insertelement <7 x i1> [[ELEM_4]], i1 [[ELEM_5]], i64 5
+; CHECK-NEXT:    [[ELEM_7:%.*]] = extractelement <3 x i1> [[ELEM_2]], i64 2
+; CHECK-NEXT:    [[BOOL_RESULT:%.*]] = insertelement <7 x i1> [[ELEM_6]], i1 [[ELEM_7]], i64 6
+; CHECK-NEXT:    [[EXT:%.*]] = zext <7 x i1> [[BOOL_RESULT]] to <7 x i32>
+; CHECK-NEXT:    ret void
+
+  %val1 = load <7 x i32>, ptr addrspace(1) %arg0, align 4
+  %val2 = load <7 x i32>, ptr addrspace(1) %arg1, align 4
+  %boolVec1 = icmp ne <7 x i32> %val1, zeroinitializer
+  %boolVec2 = icmp ne <7 x i32> %val2, zeroinitializer
+  %boolResult = xor <7 x i1> %boolVec1, %boolVec2
+  %ext = zext <7 x i1> %boolResult to <7 x i32>
+  ret void
+}
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/163912


More information about the llvm-commits mailing list