[llvm] [Arm64EC] Add support for `half` (PR #152843)
Trevor Gross via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 9 00:54:49 PDT 2025
https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/152843
>From 9dbed2c03d05d1872e8dd73712704f198652de61 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Sat, 9 Aug 2025 01:21:51 -0500
Subject: [PATCH] [Arm64EC] Add support for `half`
`f16` is passed and returned in vector registers on both x86 on AArch64,
the same calling convention as `f32`, so it is a straightforward type to
support. The calling convention support already exists, added as part of
a6065f0fa55a ("Arm64EC entry/exit thunks, consolidated. (#79067)").
Thus, add mangling and remove the error in order to make `half` work.
MSVC does not yet support `_Float16`, so for now this will remain an
LLVM-only extension.
---
.../AArch64/AArch64Arm64ECCallLowering.cpp | 25 +++++--
.../CodeGen/AArch64/arm64ec-entry-thunks.ll | 10 +--
.../CodeGen/AArch64/arm64ec-exit-thunks.ll | 75 ++++++++++---------
llvm/test/CodeGen/AArch64/frexp-arm64ec.ll | 9 +++
llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll | 9 +++
llvm/test/CodeGen/AArch64/powi-arm64ec.ll | 12 ++-
6 files changed, 92 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index ad8368e1692be..04ec3e56dcde2 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -316,6 +316,11 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
ThunkArgTranslation::PointerIndirection};
};
+ if (T->isHalfTy()) {
+ Out << "h";
+ return direct(T);
+ }
+
if (T->isFloatTy()) {
Out << "f";
return direct(T);
@@ -327,8 +332,8 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
}
if (T->isFloatingPointTy()) {
- report_fatal_error(
- "Only 32 and 64 bit floating points are supported for ARM64EC thunks");
+ report_fatal_error("Only 16, 32, and 64 bit floating points are supported "
+ "for ARM64EC thunks");
}
auto &DL = M->getDataLayout();
@@ -342,8 +347,15 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
uint64_t ElementCnt = T->getArrayNumElements();
uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(ElementTy) / 8;
uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes;
- if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) {
- Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes;
+ if (ElementTy->isHalfTy() || ElementTy->isFloatTy() ||
+ ElementTy->isDoubleTy()) {
+ if (ElementTy->isHalfTy())
+ Out << "H";
+ else if (ElementTy->isFloatTy())
+ Out << "F";
+ else if (ElementTy->isDoubleTy())
+ Out << "D";
+ Out << TotalSizeBytes;
if (Alignment.value() >= 16 && !Ret)
Out << "a" << Alignment.value();
if (TotalSizeBytes <= 8) {
@@ -355,8 +367,9 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
return pointerIndirection(T);
}
} else if (T->isFloatingPointTy()) {
- report_fatal_error("Only 32 and 64 bit floating points are supported for "
- "ARM64EC thunks");
+ report_fatal_error(
+ "Only 16, 32, and 64 bit floating points are supported "
+ "for ARM64EC thunks");
}
}
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 6aeeeed94543d..8b70c2738e3f4 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -85,10 +85,10 @@ define i64 @simple_integers(i8, i16, i32, i64) nounwind {
ret i64 0
}
-; NOTE: Only float and double are supported.
-define double @simple_floats(float, double) nounwind {
-; CHECK-LABEL: .def $ientry_thunk$cdecl$d$fd;
-; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$fd
+; NOTE: Only half, float, and double are supported.
+define double @simple_floats(half, float, double) nounwind {
+; CHECK-LABEL: .def $ientry_thunk$cdecl$d$hfd;
+; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$hfd
; CHECK: // %bb.0:
; CHECK-NEXT: stp q6, q7, [sp, #-176]! // 32-byte Folded Spill
; CHECK-NEXT: .seh_save_any_reg_px q6, 176
@@ -600,7 +600,7 @@ start:
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$i8$i8i8i8i8
; CHECK-NEXT: .word 1
; CHECK-NEXT: .symidx "#simple_floats"
-; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$fd
+; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$hfd
; CHECK-NEXT: .word 1
; CHECK-NEXT: .symidx "#has_varargs"
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$v$varargs
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index cba7a8100930f..22bffc7a0a33d 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -93,10 +93,10 @@ declare i64 @simple_integers(i8, i16, i32, i64) nounwind;
; CHECK-NEXT: .seh_endfunclet
; CHECK-NEXT: .seh_endproc
-; NOTE: Only float and double are supported.
-declare double @simple_floats(float, double) nounwind;
-; CHECK-LABEL: .def $iexit_thunk$cdecl$d$fd;
-; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$fd
+; NOTE: Only half, float, and double are supported.
+declare double @simple_floats(half, float, double) nounwind;
+; CHECK-LABEL: .def $iexit_thunk$cdecl$d$hfd;
+; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$hfd
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #48
; CHECK-NEXT: .seh_stackalloc 48
@@ -129,8 +129,8 @@ declare double @simple_floats(float, double) nounwind;
; CHECK-NEXT: adrp x11, simple_floats
; CHECK-NEXT: add x11, x11, :lo12:simple_floats
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
-; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$fd
-; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$fd
+; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$hfd
+; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$hfd
; CHECK-NEXT: blr x8
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -282,33 +282,36 @@ declare void @has_aligned_sret(ptr align 32 sret(%TSRet)) nounwind;
; CHECK: .seh_endfunclet
; CHECK: .seh_endproc
-declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
-; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2F8;
-; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2F8
+declare [2 x i8] @small_array([2 x i8], [2 x half], [2 x float]) nounwind;
+; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2mF8;
+; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2mF8
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: .seh_stackalloc 64
-; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: .seh_save_fplr 48
-; CHECK-NEXT: add x29, sp, #48
-; CHECK-NEXT: .seh_add_fp 48
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: .seh_stackalloc 80
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_fplr 64
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: .seh_add_fp 64
; CHECK-NEXT: .seh_endprologue
-; CHECK-NEXT: sturb w1, [x29, #-1]
-; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
-; CHECK-NEXT: sturb w0, [x29, #-2]
-; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
-; CHECK-NEXT: stp s0, s1, [x29, #-12]
-; CHECK-NEXT: ldurh w0, [x29, #-2]
-; CHECK-NEXT: ldur x1, [x29, #-12]
-; CHECK-NEXT: blr x16
-; CHECK-NEXT: mov w0, w8
-; CHECK-NEXT: sturh w8, [x29, #-14]
-; CHECK-NEXT: ubfx w1, w8, #8, #8
+; CHECK-NEXT: sturb w0, [x29, #-2]
+; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
+; CHECK-NEXT: sturb w1, [x29, #-1]
+; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
+; CHECK-NEXT: stur h0, [x29, #-6]
+; CHECK-NEXT: ldurh w0, [x29, #-2]
+; CHECK-NEXT: stur h1, [x29, #-4]
+; CHECK-NEXT: stp s2, s3, [x29, #-16]
+; CHECK-NEXT: ldur w1, [x29, #-6]
+; CHECK-NEXT: ldur x2, [x29, #-16]
+; CHECK-NEXT: blr x16
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: sturh w8, [x29, #-18]
+; CHECK-NEXT: ubfx w1, w8, #8, #8
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: .seh_save_fplr 48
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: .seh_stackalloc 64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_fplr 64
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: .seh_stackalloc 80
; CHECK-NEXT: .seh_endepilogue
; CHECK-NEXT: ret
; CHECK-NEXT: .seh_endfunclet
@@ -325,8 +328,8 @@ declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
; CHECK-NEXT: adrp x11, small_array
; CHECK-NEXT: add x11, x11, :lo12:small_array
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
-; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2F8
-; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2F8
+; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2mF8
+; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2mF8
; CHECK-NEXT: blr x8
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -577,7 +580,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
; CHECK-NEXT: .symidx simple_integers
; CHECK-NEXT: .word 0
; CHECK-NEXT: .symidx simple_floats
-; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$fd
+; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$hfd
; CHECK-NEXT: .word 4
; CHECK-NEXT: .symidx "#simple_floats$exit_thunk"
; CHECK-NEXT: .symidx simple_floats
@@ -601,7 +604,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
; CHECK-NEXT: .symidx has_aligned_sret
; CHECK-NEXT: .word 0
; CHECK-NEXT: .symidx small_array
-; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2F8
+; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2mF8
; CHECK-NEXT: .word 4
; CHECK-NEXT: .symidx "#small_array$exit_thunk"
; CHECK-NEXT: .symidx small_array
@@ -634,14 +637,14 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
define void @func_caller() nounwind {
call void @no_op()
call i64 @simple_integers(i8 0, i16 0, i32 0, i64 0)
- call double @simple_floats(float 0.0, double 0.0)
+ call double @simple_floats(half 0.0, float 0.0, double 0.0)
call void (...) @has_varargs()
%c = alloca i8
call void @has_sret(ptr sret([100 x i8]) %c)
%aligned = alloca %TSRet, align 32
store %TSRet { i64 0, i64 0 }, ptr %aligned, align 32
call void @has_aligned_sret(ptr align 32 sret(%TSRet) %aligned)
- call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0])
+ call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x half] [half 0.0, half 0.0], [2 x float] [float 0.0, float 0.0])
call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]])
call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)
diff --git a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
index ee326caa77c0a..c27d3c9588b9d 100644
--- a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
@@ -2,6 +2,15 @@
; Separate from llvm-frexp.ll test because this errors on half cases
+; ARM64EC-LABEL: test_frexp_f16_i32
+; ARM64EC: fcvt d0, h0
+; ARM64EC: bl "#frexp"
+; ARM64EC: fcvt h0, d0
+define { half, i32 } @test_frexp_f16_i32(half %a) {
+ %result = call { half, i32 } @llvm.frexp.f16.i32(half %a)
+ ret { half, i32 } %result
+}
+
; ARM64EC-LABEL: test_frexp_f32_i32
; ARM64EC: fcvt d0, s0
; ARM64EC: bl "#frexp"
diff --git a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
index 1f8eeccf9c338..0fde7b95f5462 100644
--- a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
@@ -3,6 +3,15 @@
; Separate from ldexp.ll test because this errors on half cases
+; ARM64EC-LABEL: ldexp_f16 =
+; ARM64EC: fcvt d0, h0
+; ARM64EC: bl "#ldexp"
+; ARM64EC: fcvt h0, d0
+define half @ldexp_f16(half %val, i32 %a) {
+ %call = call half @llvm.ldexp.f16(half %val, i32 %a)
+ ret half %call
+}
+
; ARM64EC-LABEL: ldexp_f32 =
; ARM64EC: fcvt d0, s0
; ARM64EC: bl "#ldexp"
diff --git a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
index 707159eb432ec..2e38f3c5e9a54 100644
--- a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
@@ -1,8 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm64ec-windows-msvc < %s | FileCheck -check-prefix=ARM64EC %s
-declare double @llvm.powi.f64.i32(double, i32)
+declare half @llvm.powi.f16.i32(half, i32)
declare float @llvm.powi.f32.i32(float, i32)
+declare double @llvm.powi.f64.i32(double, i32)
+
+; ARM64EC-LABEL: powi_f16
+; ARM64EC: fcvt s0, h0
+; ARM64EC: scvtf s1, w0
+; ARM64EC: bl "#powf"
+define half @powi_f16(half %x, i32 %n) nounwind {
+ %ret = tail call half @llvm.powi.f16.i32(half %x, i32 %n)
+ ret half %ret
+}
; ARM64EC-LABEL: powi_f32
; ARM64EC: scvtf s1, w0
More information about the llvm-commits
mailing list