[llvm] [Thumb2] mve-shuffle.ll - add missing check prefix coverage for some fullfp16 cases (PR #180567)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 05:10:12 PST 2026
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/180567
>From 6db10e41fab87ba62856f8aa3cadde10fbb49859 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 9 Feb 2026 17:27:54 +0000
Subject: [PATCH] [Thumb2] mve-shuffle.ll - add missing check prefix coverage
for some -mattr=+mve,+fullfp16 cases
Noticed while working on some upcoming generic shuffle handling
---
llvm/test/CodeGen/Thumb2/mve-shuffle.ll | 228 +++++++++++++++++++++++-
1 file changed, 226 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index 6f2a0b2debc47..7f624cffa014c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV,CHECKFP16,CHECKFP16-LV
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV,CHECKFP
-; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
+; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS,CHECKFP16,CHECKFP16-LIS
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS,CHECKFP
define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
@@ -1051,6 +1051,15 @@ entry:
}
define arm_aapcs_vfpcc <4 x float> @shuffle2step_f32(<8 x float> %src) {
+; CHECKFP16-LV-LABEL: shuffle2step_f32:
+; CHECKFP16-LV: @ %bb.0: @ %entry
+; CHECKFP16-LV-NEXT: vadd.f32 s7, s6, s7
+; CHECKFP16-LV-NEXT: vadd.f32 s6, s4, s5
+; CHECKFP16-LV-NEXT: vadd.f32 s5, s2, s3
+; CHECKFP16-LV-NEXT: vadd.f32 s4, s0, s1
+; CHECKFP16-LV-NEXT: vmov q0, q1
+; CHECKFP16-LV-NEXT: bx lr
+;
; CHECKFP-LABEL: shuffle2step_f32:
; CHECKFP: @ %bb.0: @ %entry
; CHECKFP-NEXT: vmov.f32 s8, s1
@@ -1062,6 +1071,15 @@ define arm_aapcs_vfpcc <4 x float> @shuffle2step_f32(<8 x float> %src) {
; CHECKFP-NEXT: vmov.f32 s3, s6
; CHECKFP-NEXT: vadd.f32 q0, q0, q2
; CHECKFP-NEXT: bx lr
+;
+; CHECKFP16-LIS-LABEL: shuffle2step_f32:
+; CHECKFP16-LIS: @ %bb.0: @ %entry
+; CHECKFP16-LIS-NEXT: vmov q2, q0
+; CHECKFP16-LIS-NEXT: vadd.f32 s3, s6, s7
+; CHECKFP16-LIS-NEXT: vadd.f32 s2, s4, s5
+; CHECKFP16-LIS-NEXT: vadd.f32 s1, s10, s11
+; CHECKFP16-LIS-NEXT: vadd.f32 s0, s8, s9
+; CHECKFP16-LIS-NEXT: bx lr
entry:
%s1 = shufflevector <8 x float> %src, <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%s2 = shufflevector <8 x float> %src, <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1070,6 +1088,19 @@ entry:
}
define arm_aapcs_vfpcc <4 x float> @shuffle3step_f32(<16 x float> %src) {
+; CHECKFP16-LABEL: shuffle3step_f32:
+; CHECKFP16: @ %bb.0: @ %entry
+; CHECKFP16-NEXT: vadd.f32 s6, s6, s7
+; CHECKFP16-NEXT: vadd.f32 s10, s9, s10
+; CHECKFP16-NEXT: vadd.f32 s4, s3, s4
+; CHECKFP16-NEXT: vadd.f32 s0, s0, s1
+; CHECKFP16-NEXT: vadd.f32 s6, s6, s8
+; CHECKFP16-NEXT: vadd.f32 s7, s10, s11
+; CHECKFP16-NEXT: vadd.f32 s5, s4, s5
+; CHECKFP16-NEXT: vadd.f32 s4, s0, s2
+; CHECKFP16-NEXT: vmov q0, q1
+; CHECKFP16-NEXT: bx lr
+;
; CHECKFP-LABEL: shuffle3step_f32:
; CHECKFP: @ %bb.0: @ %entry
; CHECKFP-NEXT: .vsave {d8, d9}
@@ -1099,6 +1130,22 @@ entry:
}
define arm_aapcs_vfpcc <4 x float> @shuffle4step_f32(<16 x float> %src) {
+; CHECKFP16-LABEL: shuffle4step_f32:
+; CHECKFP16: @ %bb.0: @ %entry
+; CHECKFP16-NEXT: vadd.f32 s4, s4, s5
+; CHECKFP16-NEXT: vadd.f32 s14, s14, s15
+; CHECKFP16-NEXT: vadd.f32 s12, s12, s13
+; CHECKFP16-NEXT: vadd.f32 s5, s2, s3
+; CHECKFP16-NEXT: vadd.f32 s0, s0, s1
+; CHECKFP16-NEXT: vadd.f32 s6, s6, s7
+; CHECKFP16-NEXT: vadd.f32 s10, s10, s11
+; CHECKFP16-NEXT: vadd.f32 s8, s8, s9
+; CHECKFP16-NEXT: vadd.f32 s3, s12, s14
+; CHECKFP16-NEXT: vadd.f32 s0, s0, s5
+; CHECKFP16-NEXT: vadd.f32 s1, s4, s6
+; CHECKFP16-NEXT: vadd.f32 s2, s8, s10
+; CHECKFP16-NEXT: bx lr
+;
; CHECKFP-LABEL: shuffle4step_f32:
; CHECKFP: @ %bb.0: @ %entry
; CHECKFP-NEXT: .vsave {d8, d9, d10, d11}
@@ -1259,6 +1306,30 @@ entry:
}
define arm_aapcs_vfpcc <8 x half> @shuffle2step_f16(<16 x half> %src) {
+; CHECKFP16-LABEL: shuffle2step_f16:
+; CHECKFP16: @ %bb.0: @ %entry
+; CHECKFP16-NEXT: vmovx.f16 s8, s1
+; CHECKFP16-NEXT: vmovx.f16 s10, s0
+; CHECKFP16-NEXT: vadd.f16 s0, s0, s10
+; CHECKFP16-NEXT: vadd.f16 s8, s1, s8
+; CHECKFP16-NEXT: vins.f16 s0, s8
+; CHECKFP16-NEXT: vmovx.f16 s8, s3
+; CHECKFP16-NEXT: vmovx.f16 s10, s2
+; CHECKFP16-NEXT: vadd.f16 s8, s3, s8
+; CHECKFP16-NEXT: vadd.f16 s1, s2, s10
+; CHECKFP16-NEXT: vmovx.f16 s2, s5
+; CHECKFP16-NEXT: vins.f16 s1, s8
+; CHECKFP16-NEXT: vadd.f16 s8, s5, s2
+; CHECKFP16-NEXT: vmovx.f16 s2, s4
+; CHECKFP16-NEXT: vadd.f16 s2, s4, s2
+; CHECKFP16-NEXT: vmovx.f16 s4, s7
+; CHECKFP16-NEXT: vins.f16 s2, s8
+; CHECKFP16-NEXT: vmovx.f16 s8, s6
+; CHECKFP16-NEXT: vadd.f16 s4, s7, s4
+; CHECKFP16-NEXT: vadd.f16 s3, s6, s8
+; CHECKFP16-NEXT: vins.f16 s3, s4
+; CHECKFP16-NEXT: bx lr
+;
; CHECKFP-LABEL: shuffle2step_f16:
; CHECKFP: @ %bb.0: @ %entry
; CHECKFP-NEXT: vmovx.f16 s8, s0
@@ -1290,6 +1361,70 @@ entry:
}
define arm_aapcs_vfpcc <8 x half> @shuffle3step_f16(<32 x half> %src) {
+; CHECKFP16-LABEL: shuffle3step_f16:
+; CHECKFP16: @ %bb.0: @ %entry
+; CHECKFP16-NEXT: .vsave {d8, d9, d10, d11}
+; CHECKFP16-NEXT: vpush {d8, d9, d10, d11}
+; CHECKFP16-NEXT: vmovx.f16 s13, s1
+; CHECKFP16-NEXT: vmovx.f16 s16, s0
+; CHECKFP16-NEXT: vins.f16 s0, s13
+; CHECKFP16-NEXT: vmovx.f16 s13, s4
+; CHECKFP16-NEXT: vmovx.f16 s15, s3
+; CHECKFP16-NEXT: vins.f16 s3, s13
+; CHECKFP16-NEXT: vmov q5, q0
+; CHECKFP16-NEXT: vins.f16 s16, s2
+; CHECKFP16-NEXT: vmovx.f16 s2, s2
+; CHECKFP16-NEXT: vmovx.f16 s14, s6
+; CHECKFP16-NEXT: vins.f16 s1, s2
+; CHECKFP16-NEXT: vmovx.f16 s2, s5
+; CHECKFP16-NEXT: vmovx.f16 s18, s16
+; CHECKFP16-NEXT: vmovx.f16 s0, s20
+; CHECKFP16-NEXT: vins.f16 s4, s2
+; CHECKFP16-NEXT: vins.f16 s14, s8
+; CHECKFP16-NEXT: vmovx.f16 s2, s8
+; CHECKFP16-NEXT: vadd.f16 s0, s0, s18
+; CHECKFP16-NEXT: vmovx.f16 s8, s1
+; CHECKFP16-NEXT: vins.f16 s15, s5
+; CHECKFP16-NEXT: vadd.f16 s8, s0, s8
+; CHECKFP16-NEXT: vadd.f16 s0, s20, s16
+; CHECKFP16-NEXT: vadd.f16 s0, s0, s1
+; CHECKFP16-NEXT: vmovx.f16 s1, s3
+; CHECKFP16-NEXT: vins.f16 s0, s8
+; CHECKFP16-NEXT: vmovx.f16 s8, s15
+; CHECKFP16-NEXT: vadd.f16 s8, s1, s8
+; CHECKFP16-NEXT: vmovx.f16 s1, s4
+; CHECKFP16-NEXT: vmovx.f16 s13, s7
+; CHECKFP16-NEXT: vadd.f16 s8, s8, s1
+; CHECKFP16-NEXT: vadd.f16 s1, s3, s15
+; CHECKFP16-NEXT: vins.f16 s6, s13
+; CHECKFP16-NEXT: vadd.f16 s1, s1, s4
+; CHECKFP16-NEXT: vins.f16 s7, s2
+; CHECKFP16-NEXT: vins.f16 s1, s8
+; CHECKFP16-NEXT: vmovx.f16 s8, s6
+; CHECKFP16-NEXT: vmovx.f16 s4, s14
+; CHECKFP16-NEXT: vmovx.f16 s12, s9
+; CHECKFP16-NEXT: vmovx.f16 s13, s10
+; CHECKFP16-NEXT: vmovx.f16 s2, s11
+; CHECKFP16-NEXT: vadd.f16 s4, s8, s4
+; CHECKFP16-NEXT: vmovx.f16 s8, s7
+; CHECKFP16-NEXT: vadd.f16 s6, s6, s14
+; CHECKFP16-NEXT: vins.f16 s10, s2
+; CHECKFP16-NEXT: vins.f16 s12, s11
+; CHECKFP16-NEXT: vins.f16 s9, s13
+; CHECKFP16-NEXT: vadd.f16 s2, s6, s7
+; CHECKFP16-NEXT: vadd.f16 s4, s4, s8
+; CHECKFP16-NEXT: vins.f16 s2, s4
+; CHECKFP16-NEXT: vmovx.f16 s4, s12
+; CHECKFP16-NEXT: vmovx.f16 s6, s9
+; CHECKFP16-NEXT: vadd.f16 s4, s6, s4
+; CHECKFP16-NEXT: vmovx.f16 s6, s10
+; CHECKFP16-NEXT: vadd.f16 s4, s4, s6
+; CHECKFP16-NEXT: vadd.f16 s6, s9, s12
+; CHECKFP16-NEXT: vadd.f16 s3, s6, s10
+; CHECKFP16-NEXT: vins.f16 s3, s4
+; CHECKFP16-NEXT: vpop {d8, d9, d10, d11}
+; CHECKFP16-NEXT: bx lr
+;
; CHECKFP-LABEL: shuffle3step_f16:
; CHECKFP: @ %bb.0: @ %entry
; CHECKFP-NEXT: .vsave {d8, d9}
@@ -1339,6 +1474,95 @@ entry:
}
define arm_aapcs_vfpcc <8 x half> @shuffle4step_f16(<32 x half> %src) {
+; CHECKFP16-LABEL: shuffle4step_f16:
+; CHECKFP16: @ %bb.0: @ %entry
+; CHECKFP16-NEXT: .vsave {d12, d13, d14, d15}
+; CHECKFP16-NEXT: vpush {d12, d13, d14, d15}
+; CHECKFP16-NEXT: .vsave {d8, d9, d10}
+; CHECKFP16-NEXT: vpush {d8, d9, d10}
+; CHECKFP16-NEXT: vmov q4, q3
+; CHECKFP16-NEXT: vmovx.f16 s30, s9
+; CHECKFP16-NEXT: vmovx.f16 s12, s11
+; CHECKFP16-NEXT: vmovx.f16 s14, s17
+; CHECKFP16-NEXT: vins.f16 s30, s12
+; CHECKFP16-NEXT: vmovx.f16 s12, s19
+; CHECKFP16-NEXT: vmovx.f16 s24, s1
+; CHECKFP16-NEXT: vins.f16 s14, s12
+; CHECKFP16-NEXT: vmovx.f16 s12, s3
+; CHECKFP16-NEXT: vins.f16 s1, s3
+; CHECKFP16-NEXT: vmovx.f16 s20, s5
+; CHECKFP16-NEXT: vins.f16 s24, s12
+; CHECKFP16-NEXT: vmovx.f16 s12, s7
+; CHECKFP16-NEXT: vmov.f32 s28, s1
+; CHECKFP16-NEXT: vins.f16 s20, s12
+; CHECKFP16-NEXT: vmovx.f16 s12, s24
+; CHECKFP16-NEXT: vmovx.f16 s1, s1
+; CHECKFP16-NEXT: vins.f16 s5, s7
+; CHECKFP16-NEXT: vadd.f16 s1, s1, s12
+; CHECKFP16-NEXT: vmovx.f16 s12, s0
+; CHECKFP16-NEXT: vins.f16 s0, s2
+; CHECKFP16-NEXT: vmovx.f16 s2, s2
+; CHECKFP16-NEXT: vmovx.f16 s26, s8
+; CHECKFP16-NEXT: vins.f16 s8, s10
+; CHECKFP16-NEXT: vmovx.f16 s7, s4
+; CHECKFP16-NEXT: vins.f16 s12, s2
+; CHECKFP16-NEXT: vmovx.f16 s2, s6
+; CHECKFP16-NEXT: vins.f16 s4, s6
+; CHECKFP16-NEXT: vins.f16 s7, s2
+; CHECKFP16-NEXT: vmov.f32 s2, s8
+; CHECKFP16-NEXT: vmovx.f16 s8, s0
+; CHECKFP16-NEXT: vmovx.f16 s6, s12
+; CHECKFP16-NEXT: vadd.f16 s6, s8, s6
+; CHECKFP16-NEXT: vadd.f16 s8, s28, s24
+; CHECKFP16-NEXT: vadd.f16 s0, s0, s12
+; CHECKFP16-NEXT: vmovx.f16 s10, s10
+; CHECKFP16-NEXT: vadd.f16 s0, s0, s8
+; CHECKFP16-NEXT: vadd.f16 s6, s6, s1
+; CHECKFP16-NEXT: vmovx.f16 s3, s16
+; CHECKFP16-NEXT: vins.f16 s26, s10
+; CHECKFP16-NEXT: vmovx.f16 s10, s18
+; CHECKFP16-NEXT: vins.f16 s0, s6
+; CHECKFP16-NEXT: vmovx.f16 s6, s20
+; CHECKFP16-NEXT: vmovx.f16 s8, s5
+; CHECKFP16-NEXT: vins.f16 s3, s10
+; CHECKFP16-NEXT: vadd.f16 s6, s8, s6
+; CHECKFP16-NEXT: vmovx.f16 s10, s4
+; CHECKFP16-NEXT: vmovx.f16 s8, s7
+; CHECKFP16-NEXT: vadd.f16 s8, s10, s8
+; CHECKFP16-NEXT: vadd.f16 s4, s4, s7
+; CHECKFP16-NEXT: vadd.f16 s6, s8, s6
+; CHECKFP16-NEXT: vadd.f16 s8, s5, s20
+; CHECKFP16-NEXT: vadd.f16 s1, s4, s8
+; CHECKFP16-NEXT: vins.f16 s9, s11
+; CHECKFP16-NEXT: vins.f16 s1, s6
+; CHECKFP16-NEXT: vmovx.f16 s4, s30
+; CHECKFP16-NEXT: vmovx.f16 s6, s9
+; CHECKFP16-NEXT: vmovx.f16 s8, s2
+; CHECKFP16-NEXT: vadd.f16 s4, s6, s4
+; CHECKFP16-NEXT: vmovx.f16 s6, s26
+; CHECKFP16-NEXT: vadd.f16 s6, s8, s6
+; CHECKFP16-NEXT: vadd.f16 s2, s2, s26
+; CHECKFP16-NEXT: vadd.f16 s4, s6, s4
+; CHECKFP16-NEXT: vadd.f16 s6, s9, s30
+; CHECKFP16-NEXT: vadd.f16 s2, s2, s6
+; CHECKFP16-NEXT: vins.f16 s17, s19
+; CHECKFP16-NEXT: vins.f16 s16, s18
+; CHECKFP16-NEXT: vins.f16 s2, s4
+; CHECKFP16-NEXT: vmovx.f16 s4, s14
+; CHECKFP16-NEXT: vmovx.f16 s6, s17
+; CHECKFP16-NEXT: vadd.f16 s4, s6, s4
+; CHECKFP16-NEXT: vmovx.f16 s6, s3
+; CHECKFP16-NEXT: vmovx.f16 s8, s16
+; CHECKFP16-NEXT: vadd.f16 s6, s8, s6
+; CHECKFP16-NEXT: vadd.f16 s8, s16, s3
+; CHECKFP16-NEXT: vadd.f16 s4, s6, s4
+; CHECKFP16-NEXT: vadd.f16 s6, s17, s14
+; CHECKFP16-NEXT: vadd.f16 s3, s8, s6
+; CHECKFP16-NEXT: vins.f16 s3, s4
+; CHECKFP16-NEXT: vpop {d8, d9, d10}
+; CHECKFP16-NEXT: vpop {d12, d13, d14, d15}
+; CHECKFP16-NEXT: bx lr
+;
; CHECKFP-LABEL: shuffle4step_f16:
; CHECKFP: @ %bb.0: @ %entry
; CHECKFP-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
More information about the llvm-commits
mailing list