[llvm] [AArch64][SVE] Fix -msve-vector-bits=256 fixed width vector crash (PR #171776)
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 06:19:36 PST 2025
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/171776
>From 1f83e9ec0e65dde4cf79d505b6cd0935ab99d9cf Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Thu, 11 Dec 2025 07:48:41 +0000
Subject: [PATCH 1/2] [AArch64][SVE] Fix -msve-vector-bits=256 fixed width
vector crashes
This adds tests for and fixes an issue where v8bf16 ISD::FP_ROUND v8f32 cannot
be lowered when -msve-vector-bits=256.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 2 +-
llvm/test/CodeGen/AArch64/fptrunc_256.ll | 70 +++++++++++++++++++
2 files changed, 71 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/fptrunc_256.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3012343386c07..69d7b7f241c44 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1878,7 +1878,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// 128bit results imply a bigger than NEON input.
for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(ISD::TRUNCATE, VT, Custom);
- for (auto VT : {MVT::v8f16, MVT::v4f32})
+ for (auto VT : {MVT::v8f16, MVT::v4f32, MVT::v8bf16})
setOperationAction(ISD::FP_ROUND, VT, Custom);
// These operations are not supported on NEON but SVE can do them.
diff --git a/llvm/test/CodeGen/AArch64/fptrunc_256.ll b/llvm/test/CodeGen/AArch64/fptrunc_256.ll
new file mode 100644
index 0000000000000..fe472f2235a4d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fptrunc_256.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+
+define <8 x bfloat> @fptrunc_poison_shuffle_v8bf16(<4 x float> %a) #0 {
+; CHECK-LABEL: fptrunc_poison_shuffle_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %shuffle = shufflevector <4 x float> %a, <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
+ %fpt = fptrunc <8 x float> %shuffle to <8 x bfloat>
+ ret <8 x bfloat> %fpt
+}
+
+define <8 x bfloat> @fptrunc_shuffle_v8bf16(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: fptrunc_shuffle_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %fpt = fptrunc <8 x float> %shuffle to <8 x bfloat>
+ ret <8 x bfloat> %fpt
+}
+
+define <8 x half> @fptrunc_poison_shuffle_v8f16(<4 x float> %a) #0 {
+; CHECK-LABEL: fptrunc_poison_shuffle_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %shuffle = shufflevector <4 x float> %a, <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
+ %fpt = fptrunc <8 x float> %shuffle to <8 x half>
+ ret <8 x half> %fpt
+}
+
+define <8 x half> @fptrunc_shuffle_v8f16(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: fptrunc_shuffle_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %fpt = fptrunc <8 x float> %shuffle to <8 x half>
+ ret <8 x half> %fpt
+}
+
+attributes #0 = { vscale_range(2,2) "target-features"="+bf16,+sve" }
>From 5d45aa9e87813cd5e18f24e26774e9961cfcd1e9 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Fri, 12 Dec 2025 14:18:23 +0000
Subject: [PATCH 2/2] Rename and simplify tests
---
llvm/test/CodeGen/AArch64/fptrunc_256.ll | 70 -------------------
.../AArch64/sve-fixed-length-fptrunc.ll | 36 ++++++++++
2 files changed, 36 insertions(+), 70 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/fptrunc_256.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-fptrunc.ll
diff --git a/llvm/test/CodeGen/AArch64/fptrunc_256.ll b/llvm/test/CodeGen/AArch64/fptrunc_256.ll
deleted file mode 100644
index fe472f2235a4d..0000000000000
--- a/llvm/test/CodeGen/AArch64/fptrunc_256.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
-
-define <8 x bfloat> @fptrunc_poison_shuffle_v8bf16(<4 x float> %a) #0 {
-; CHECK-LABEL: fptrunc_poison_shuffle_v8bf16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: bfcvt z0.h, p0/m, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: ret
- %shuffle = shufflevector <4 x float> %a, <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
- %fpt = fptrunc <8 x float> %shuffle to <8 x bfloat>
- ret <8 x bfloat> %fpt
-}
-
-define <8 x bfloat> @fptrunc_shuffle_v8bf16(<4 x float> %a, <4 x float> %b) #0 {
-; CHECK-LABEL: fptrunc_shuffle_v8bf16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: bfcvt z0.h, p0/m, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: ret
- %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %fpt = fptrunc <8 x float> %shuffle to <8 x bfloat>
- ret <8 x bfloat> %fpt
-}
-
-define <8 x half> @fptrunc_poison_shuffle_v8f16(<4 x float> %a) #0 {
-; CHECK-LABEL: fptrunc_poison_shuffle_v8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: ret
- %shuffle = shufflevector <4 x float> %a, <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
- %fpt = fptrunc <8 x float> %shuffle to <8 x half>
- ret <8 x half> %fpt
-}
-
-define <8 x half> @fptrunc_shuffle_v8f16(<4 x float> %a, <4 x float> %b) #0 {
-; CHECK-LABEL: fptrunc_shuffle_v8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: ret
- %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %fpt = fptrunc <8 x float> %shuffle to <8 x half>
- ret <8 x half> %fpt
-}
-
-attributes #0 = { vscale_range(2,2) "target-features"="+bf16,+sve" }
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fptrunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fptrunc.ll
new file mode 100644
index 0000000000000..65c8dc0ad59dc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fptrunc.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+
+define <8 x bfloat> @fptrunc_v8bf16(<8 x float> %a) #0 {
+; CHECK-LABEL: fptrunc_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %fpt = fptrunc <8 x float> %a to <8 x bfloat>
+ ret <8 x bfloat> %fpt
+}
+
+define <8 x half> @fptrunc_v8f16(<8 x float> %a) #0 {
+; CHECK-LABEL: fptrunc_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %fpt = fptrunc <8 x float> %a to <8 x half>
+ ret <8 x half> %fpt
+}
+
+attributes #0 = { vscale_range(2,2) "target-features"="+bf16,+sve" }
More information about the llvm-commits
mailing list