[llvm] 7da91fa - [CodeGen] Fix failing assert in interleaved access pass (#156457)

Wed Sep 3 02:55:43 PDT 2025

Author: David Sherwood
Date: 2025-09-03T10:55:39+01:00
New Revision: 7da91fa801d8bd490c8dcd9a29faba209feb2954

URL: https://github.com/llvm/llvm-project/commit/7da91fa801d8bd490c8dcd9a29faba209feb2954
DIFF: https://github.com/llvm/llvm-project/commit/7da91fa801d8bd490c8dcd9a29faba209feb2954.diff

LOG: [CodeGen] Fix failing assert in interleaved access pass (#156457)

In the InterleavedAccessPass the function getMask assumes that
shufflevector operations are always fixed width, which isn't true
because we use them for splats of scalable vectors. This patch fixes the
code by bailing out for scalable vectors.

Added: 
    

Modified: 
    llvm/lib/CodeGen/InterleavedAccessPass.cpp
    llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c5e97037be336..e3ded12a1847b 100644

--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -660,6 +660,10 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
   }
 
   if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
+    Type *Op1Ty = SVI->getOperand(1)->getType();
+    if (!isa<FixedVectorType>(Op1Ty))
+      return {nullptr, GapMask};
+
     // Check that the shuffle mask is: a) an interleave, b) all of the same
     // set of the elements, and c) contained by the first source.  (c) could
     // be relaxed if desired.

diff  --git a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
index d7649801ea2fc..ed9fba3a01965 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt < %s -interleaved-access -S | FileCheck %s
 ; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s
 
 target triple = "aarch64-linux-gnu"
@@ -186,6 +185,22 @@ define void @interleave_nxptr_factor2(ptr %ptr, <vscale x 2 x ptr> %l, <vscale x
   ret void
 }
 
+define void @interleave_nxi8_factor2_masked_store_splatmask(ptr %ptr, <vscale x 16 x i8> %l, <vscale x 16 x i8> %r, i1 %mask) #0 {
+; CHECK-LABEL: define void @interleave_nxi8_factor2_masked_store_splatmask
+; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 16 x i8> [[L:%.*]], <vscale x 16 x i8> [[R:%.*]], i1 [[MASK:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INTERLEAVE:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[L]], <vscale x 16 x i8> [[R]])
+; CHECK-NEXT:    [[MASK_INS:%.*]] = insertelement <vscale x 32 x i1> poison, i1 [[MASK]], i64 0
+; CHECK-NEXT:    [[MASK_SPLAT:%.*]] = shufflevector <vscale x 32 x i1> [[MASK_INS]], <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+; CHECK-NEXT:    tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVE]], ptr [[PTR]], i32 1, <vscale x 32 x i1> [[MASK_SPLAT]])
+; CHECK-NEXT:    ret void
+;
+  %interleave = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %l, <vscale x 16 x i8> %r)
+  %mask.ins = insertelement <vscale x 32 x i1> poison, i1 %mask, i64 0
+  %mask.splat = shufflevector <vscale x 32 x i1> %mask.ins, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> %interleave, ptr %ptr, i32 1, <vscale x 32 x i1> %mask.splat)
+  ret void
+}
+
 ;;; Check that we 'legalize' operations that are wider than the target supports.
 
 define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 {