[llvm] [AArch64][GlobalISel] Don't crash when legalising vector G_SHL (PR #168848)

Thu Nov 20 02:45:03 PST 2025

https://github.com/cofibrant created https://github.com/llvm/llvm-project/pull/168848

Fixes a crash occuring in the AArch64 GlobalISel legaliser pass when legalising a `G_SHL` of vectors. The crash occured because the legalisation rule modifying the scalar type was being applied after the rule modifying the number of elements of the vectors, while the action padding out vector inputs to `G_SHL` (and other shifts) assumes the scalar types already agree.

Fixes #168224


>From 73debc0ee3cf37a64e3bf9b4984c8cfc9a474557 Mon Sep 17 00:00:00 2001
From: Nathan Corbyn <n_corbyn at apple.com>
Date: Thu, 20 Nov 2025 10:28:55 +0000
Subject: [PATCH] [AArch64][GlobalISel] Don't crash when legalising vector
 G_SHL

---
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    | 67 ++++++++++---------
 .../AArch64/aarch64-vector-shl-crash.ll       | 13 ++++
 2 files changed, 47 insertions(+), 33 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index a88817c9d2d19..efd525bbbdabd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -216,15 +216,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0)
       .clampScalar(1, s32, s64)
       .clampScalar(0, s32, s64)
+      .minScalarSameAs(1, 0)
+      .minScalarEltSameAsIf(isVector(0), 1, 0)
+      .maxScalarEltSameAsIf(isVector(0), 1, 0)
       .clampNumElements(0, v8s8, v16s8)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0)
-      .minScalarSameAs(1, 0)
-      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
-      .minScalarEltSameAsIf(isVector(0), 1, 0)
-      .maxScalarEltSameAsIf(isVector(0), 1, 0);
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
 
   getActionDefinitionsBuilder(G_PTR_ADD)
       .legalFor({{p0, s64}, {v2p0, v2s64}})
@@ -467,29 +467,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder(G_FMAD).lower();
 
   for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
-    auto &Actions =  getActionDefinitionsBuilder(Op);
+    auto &Actions = getActionDefinitionsBuilder(Op);
 
     if (Op == G_SEXTLOAD)
-      Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
+      Actions.lowerIf(
+          atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
 
     // Atomics have zero extending behavior.
     Actions
-      .legalForTypesWithMemDesc({{s32, p0, s8, 8},
-                                 {s32, p0, s16, 8},
-                                 {s32, p0, s32, 8},
-                                 {s64, p0, s8, 2},
-                                 {s64, p0, s16, 2},
-                                 {s64, p0, s32, 4},
-                                 {s64, p0, s64, 8},
-                                 {p0, p0, s64, 8},
-                                 {v2s32, p0, s64, 8}})
-      .widenScalarToNextPow2(0)
-      .clampScalar(0, s32, s64)
-      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
-      //       how to do that yet.
-      .unsupportedIfMemSizeNotPow2()
-      // Lower anything left over into G_*EXT and G_LOAD
-      .lower();
+        .legalForTypesWithMemDesc({{s32, p0, s8, 8},
+                                   {s32, p0, s16, 8},
+                                   {s32, p0, s32, 8},
+                                   {s64, p0, s8, 2},
+                                   {s64, p0, s16, 2},
+                                   {s64, p0, s32, 4},
+                                   {s64, p0, s64, 8},
+                                   {p0, p0, s64, 8},
+                                   {v2s32, p0, s64, 8}})
+        .widenScalarToNextPow2(0)
+        .clampScalar(0, s32, s64)
+        // TODO: We could support sum-of-pow2's but the lowering code doesn't
+        // know
+        //       how to do that yet.
+        .unsupportedIfMemSizeNotPow2()
+        // Lower anything left over into G_*EXT and G_LOAD
+        .lower();
   }
 
   auto IsPtrVecPred = [=](const LegalityQuery &Query) {
@@ -982,9 +984,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   // Control-flow
   getActionDefinitionsBuilder(G_BR).alwaysLegal();
-  getActionDefinitionsBuilder(G_BRCOND)
-    .legalFor({s32})
-    .clampScalar(0, s32, s32);
+  getActionDefinitionsBuilder(G_BRCOND).legalFor({s32}).clampScalar(0, s32,
+                                                                    s32);
   getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
 
   getActionDefinitionsBuilder(G_SELECT)
@@ -1053,8 +1054,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0, /*Min*/ 8);
 
   getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
-      .lowerIf(
-          all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
+      .lowerIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
 
   bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
 
@@ -1606,7 +1606,7 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
   // Don't modify an intrinsic call.
   if (GlobalOp.isSymbol())
     return true;
-  const auto* GV = GlobalOp.getGlobal();
+  const auto *GV = GlobalOp.getGlobal();
   if (GV->isThreadLocal())
     return true; // Don't want to modify TLS vars.
 
@@ -1680,10 +1680,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
   switch (IntrinsicID) {
   case Intrinsic::vacopy: {
     unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
-    unsigned VaListSize =
-      (ST->isTargetDarwin() || ST->isTargetWindows())
-          ? PtrSize
-          : ST->isTargetILP32() ? 20 : 32;
+    unsigned VaListSize = (ST->isTargetDarwin() || ST->isTargetWindows())
+                              ? PtrSize
+                          : ST->isTargetILP32() ? 20
+                                                : 32;
 
     MachineFunction &MF = *MI.getMF();
     auto Val = MF.getRegInfo().createGenericVirtualRegister(
@@ -2122,7 +2122,8 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
   // v8s16,v4s32,v2s64 -> v16i8
   LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
   if (Ty.isScalar()) {
-    assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
+    assert((Size == 32 || Size == 64 || Size == 128) &&
+           "Expected only 32, 64, or 128 bit scalars!");
     if (Size == 32) {
       Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
     }
diff --git a/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll
new file mode 100644
index 0000000000000..8848fb215c55d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll
@@ -0,0 +1,13 @@
+; RUN: llc -global-isel -o - %s | FileCheck %s
+
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here.
+
+define <2 x i8> @test() {
+entry:
+  %zeroes = zext <2 x i1> zeroinitializer to <2 x i32>
+  %ones = shl <2 x i32> splat (i32 1), %zeroes
+  %ones.trunc = trunc <2 x i32> %ones to <2 x i8>
+  ret <2 x i8> %ones.trunc
+}