[llvm] Scalarizer : Fix vector shuffle issue when can't aligned to customized minBits. (PR #163912)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 3 22:19:46 PST 2025


ShchchowAMD wrote:

I made a temp change to only use shufflevector to fix this issue, but not make sure whether it is better.
As this issue's root cause is extendMask, maybe we can add several small extend for last small fragment which can't meet validation requirement?

Only add following codes before fragment extend to avoid those insert and extract call:

```
      unsigned remainedSize = NumPacked;
      while (remainedSize <= VS.NumPacked / 2) {
        // If last pack of remained bits not aligned to target pack size.
        remainedSize = remainedSize * 2;
        SmallVector<int> SmallExtendMask;
        SmallExtendMask.resize(remainedSize, -1);
        for (unsigned I = 0; I < remainedSize; ++I)
          SmallExtendMask[I] = I;
        Fragment =
            Builder.CreateShuffleVector(Fragment, Fragment, SmallExtendMask);
      }

      Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);

```

And final generated IR of example test should be like:
```

; ModuleID = 'test.txt'
source_filename = "test.txt"

define void @vector_scalar_not_aligned(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
  %val1 = load <11 x i32>, ptr addrspace(1) %arg0, align 4
  %val2 = load <11 x i32>, ptr addrspace(1) %arg1, align 4
  %boolVec1 = icmp ne <11 x i32> %val1, zeroinitializer
  %boolVec1.i0 = shufflevector <11 x i1> %boolVec1, <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %boolVec1.i1 = shufflevector <11 x i1> %boolVec1, <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
  %boolVec2 = icmp ne <11 x i32> %val2, zeroinitializer
  %boolVec2.i0 = shufflevector <11 x i1> %boolVec2, <11 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %boolResult.i0 = xor <8 x i1> %boolVec1.i0, %boolVec2.i0
  %boolVec2.i1 = shufflevector <11 x i1> %boolVec2, <11 x i1> poison, <3 x i32> <i32 8, i32 9, i32 10>
  %boolResult.i1 = xor <3 x i1> %boolVec1.i1, %boolVec2.i1
  %1 = shufflevector <8 x i1> %boolResult.i0, <8 x i1> %boolResult.i0, <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
  %2 = shufflevector <3 x i1> %boolResult.i1, <3 x i1> %boolResult.i1, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
  %3 = shufflevector <6 x i1> %2, <6 x i1> %2, <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison>
  %boolResult = shufflevector <11 x i1> %1, <11 x i1> %3, <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 12, i32 13>
  %ext = zext <11 x i1> %boolResult to <11 x i32>
  ret void
}
```

https://github.com/llvm/llvm-project/pull/163912


More information about the llvm-commits mailing list