[llvm] [AArch64][GISel] Don't crash in known-bits when copying from vectors to non-vectors (PR #168081)
Nathan Corbyn via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 13:00:25 PST 2025
https://github.com/cofibrant updated https://github.com/llvm/llvm-project/pull/168081
>From e0861f0747c999a162876f02889f8f4f67e21cd1 Mon Sep 17 00:00:00 2001
From: Nathan Corbyn <n_corbyn at apple.com>
Date: Fri, 14 Nov 2025 16:15:00 +0000
Subject: [PATCH 1/2] [AArch64][GISel] Don't crash in known-bits when copying
from vectors to non-vectors
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 11 +++-
.../GlobalISel/knownbits-copy-vector-crash.ll | 50 +++++++++++++++++++
2 files changed, 59 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..ecba323f8d6bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
const MachineOperand &Src = MI.getOperand(Idx);
Register SrcReg = Src.getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
// Look through trivial copies and phis but don't look through trivial
// copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
// analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
// We can't use NoSubRegister by name as it's defined by each target but
// it's always defined to be 0 by tablegen.
if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
- MRI.getType(SrcReg).isValid()) {
+ SrcTy.isValid()) {
+ // In case we're forwarding from a vector register to a non-vector
+ // register we need to update the demanded elements to reflect this
+ // before recursing.
+ APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
+ ? APInt::getAllOnes(SrcTy.getNumElements())
+ : DemandedElts; // Known to be APInt(1, 1)
// For COPYs we don't do anything, don't increase the depth.
- computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+ computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
Known2 = Known2.anyextOrTrunc(BitWidth);
Known = Known.intersectWith(Known2);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
new file mode 100644
index 0000000000000..76975e334e00b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -O3 -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here when computing known bits.
+
+define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
+; CHECK-LABEL: test:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: mov w9, wzr
+; CHECK-NEXT: .LBB0_1: // %loop
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: mov w8, w9
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: bfi x9, x8, #1, #3
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: ldrh w9, [x9]
+; CHECK-NEXT: tst w9, #0xff
+; CHECK-NEXT: cset w9, eq
+; CHECK-NEXT: mov v1.h[0], w9
+; CHECK-NEXT: xtn v1.8b, v1.8h
+; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: tbz w0, #0, .LBB0_1
+; CHECK-NEXT: // %bb.2: // %exit
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov v0.s[0], w8
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+exit:
+ %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
+ ret <4 x i32> %result
+
+loop:
+ %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
+ %extracted = extractelement <8 x i16> %in, i32 %index
+ %masked = and i16 %extracted, 255
+ %maskedIsZero = icmp eq i16 %masked, 0
+ %maskedIsZero.zext = zext i1 %maskedIsZero to i8
+ %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
+ %insert.bitcast = bitcast <4 x i8> %insert to i32
+ br i1 %continue, label %exit, label %loop
+}
>From 85d0b1414ea4fa39abb330e71d5c2a2cd63454d0 Mon Sep 17 00:00:00 2001
From: Nathan Corbyn <n_corbyn at apple.com>
Date: Mon, 17 Nov 2025 21:00:06 +0000
Subject: [PATCH 2/2] Repair test
---
.../GlobalISel/knownbits-copy-vector-crash.ll | 42 +++++++++++--------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
index 76975e334e00b..f15253682c336 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -O3 -o - %s | FileCheck %s
+; RUN: llc -global-isel -o - %s | FileCheck %s
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-unknown"
; Check we don't crash here when computing known bits.
@@ -9,27 +8,34 @@ target triple = "aarch64-unknown-unknown"
define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: mov w9, wzr
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: mov w12, wzr
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: mov w9, #2 // =0x2
+; CHECK-NEXT: mov w10, #0 // =0x0
; CHECK-NEXT: .LBB0_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: mov w8, w9
-; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: mov w11, w12
+; CHECK-NEXT: mov w12, w12
; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: bfi x9, x8, #1, #3
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ldrh w9, [x9]
-; CHECK-NEXT: tst w9, #0xff
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: mov v1.h[0], w9
-; CHECK-NEXT: xtn v1.8b, v1.8h
-; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: and x12, x12, #0x7
+; CHECK-NEXT: umull x12, w12, w9
+; CHECK-NEXT: ldrb w12, [x8, x12]
+; CHECK-NEXT: cmp w12, #0
+; CHECK-NEXT: cset w12, eq
+; CHECK-NEXT: fmov s1, w12
+; CHECK-NEXT: mov v1.b[1], w10
+; CHECK-NEXT: mov v1.b[2], w10
+; CHECK-NEXT: mov v1.b[3], w10
+; CHECK-NEXT: fmov w12, s1
; CHECK-NEXT: tbz w0, #0, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: mov v0.s[0], w8
-; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: fmov s0, w11
+; CHECK-NEXT: mov v0.s[1], wzr
+; CHECK-NEXT: mov v0.s[2], wzr
+; CHECK-NEXT: mov v0.s[3], wzr
+; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
entry:
br label %loop
More information about the llvm-commits
mailing list