[llvm] [AArch64][GlobalISel] Don't crash when legalising vector G_SHL (PR #168848)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 02:45:32 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Nathan Corbyn (cofibrant)
<details>
<summary>Changes</summary>
Fixes a crash occuring in the AArch64 GlobalISel legaliser pass when legalising a `G_SHL` of vectors. The crash occured because the legalisation rule modifying the scalar type was being applied after the rule modifying the number of elements of the vectors, while the action padding out vector inputs to `G_SHL` (and other shifts) assumes the scalar types already agree.
Fixes #<!-- -->168224
---
Full diff: https://github.com/llvm/llvm-project/pull/168848.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+34-33)
- (added) llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll (+13)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index a88817c9d2d19..efd525bbbdabd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -216,15 +216,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.clampScalar(1, s32, s64)
.clampScalar(0, s32, s64)
+ .minScalarSameAs(1, 0)
+ .minScalarEltSameAsIf(isVector(0), 1, 0)
+ .maxScalarEltSameAsIf(isVector(0), 1, 0)
.clampNumElements(0, v8s8, v16s8)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0)
- .minScalarSameAs(1, 0)
- .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
- .minScalarEltSameAsIf(isVector(0), 1, 0)
- .maxScalarEltSameAsIf(isVector(0), 1, 0);
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
getActionDefinitionsBuilder(G_PTR_ADD)
.legalFor({{p0, s64}, {v2p0, v2s64}})
@@ -467,29 +467,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FMAD).lower();
for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
- auto &Actions = getActionDefinitionsBuilder(Op);
+ auto &Actions = getActionDefinitionsBuilder(Op);
if (Op == G_SEXTLOAD)
- Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
+ Actions.lowerIf(
+ atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
// Atomics have zero extending behavior.
Actions
- .legalForTypesWithMemDesc({{s32, p0, s8, 8},
- {s32, p0, s16, 8},
- {s32, p0, s32, 8},
- {s64, p0, s8, 2},
- {s64, p0, s16, 2},
- {s64, p0, s32, 4},
- {s64, p0, s64, 8},
- {p0, p0, s64, 8},
- {v2s32, p0, s64, 8}})
- .widenScalarToNextPow2(0)
- .clampScalar(0, s32, s64)
- // TODO: We could support sum-of-pow2's but the lowering code doesn't know
- // how to do that yet.
- .unsupportedIfMemSizeNotPow2()
- // Lower anything left over into G_*EXT and G_LOAD
- .lower();
+ .legalForTypesWithMemDesc({{s32, p0, s8, 8},
+ {s32, p0, s16, 8},
+ {s32, p0, s32, 8},
+ {s64, p0, s8, 2},
+ {s64, p0, s16, 2},
+ {s64, p0, s32, 4},
+ {s64, p0, s64, 8},
+ {p0, p0, s64, 8},
+ {v2s32, p0, s64, 8}})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ // TODO: We could support sum-of-pow2's but the lowering code doesn't
+ // know
+ // how to do that yet.
+ .unsupportedIfMemSizeNotPow2()
+ // Lower anything left over into G_*EXT and G_LOAD
+ .lower();
}
auto IsPtrVecPred = [=](const LegalityQuery &Query) {
@@ -982,9 +984,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Control-flow
getActionDefinitionsBuilder(G_BR).alwaysLegal();
- getActionDefinitionsBuilder(G_BRCOND)
- .legalFor({s32})
- .clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_BRCOND).legalFor({s32}).clampScalar(0, s32,
+ s32);
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
getActionDefinitionsBuilder(G_SELECT)
@@ -1053,8 +1054,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0, /*Min*/ 8);
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
- .lowerIf(
- all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
+ .lowerIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
@@ -1606,7 +1606,7 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// Don't modify an intrinsic call.
if (GlobalOp.isSymbol())
return true;
- const auto* GV = GlobalOp.getGlobal();
+ const auto *GV = GlobalOp.getGlobal();
if (GV->isThreadLocal())
return true; // Don't want to modify TLS vars.
@@ -1680,10 +1680,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
switch (IntrinsicID) {
case Intrinsic::vacopy: {
unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
- unsigned VaListSize =
- (ST->isTargetDarwin() || ST->isTargetWindows())
- ? PtrSize
- : ST->isTargetILP32() ? 20 : 32;
+ unsigned VaListSize = (ST->isTargetDarwin() || ST->isTargetWindows())
+ ? PtrSize
+ : ST->isTargetILP32() ? 20
+ : 32;
MachineFunction &MF = *MI.getMF();
auto Val = MF.getRegInfo().createGenericVirtualRegister(
@@ -2122,7 +2122,8 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
// v8s16,v4s32,v2s64 -> v16i8
LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
if (Ty.isScalar()) {
- assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
+ assert((Size == 32 || Size == 64 || Size == 128) &&
+ "Expected only 32, 64, or 128 bit scalars!");
if (Size == 32) {
Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll
new file mode 100644
index 0000000000000..8848fb215c55d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll
@@ -0,0 +1,13 @@
+; RUN: llc -global-isel -o - %s | FileCheck %s
+
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here.
+
+define <2 x i8> @test() {
+entry:
+ %zeroes = zext <2 x i1> zeroinitializer to <2 x i32>
+ %ones = shl <2 x i32> splat (i32 1), %zeroes
+ %ones.trunc = trunc <2 x i32> %ones to <2 x i8>
+ ret <2 x i8> %ones.trunc
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/168848
More information about the llvm-commits
mailing list