[llvm] 919021b - [Arm64EC] Add support for `half` (#152843)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 12 14:15:55 PDT 2025
Author: Trevor Gross
Date: 2025-08-12T14:15:52-07:00
New Revision: 919021b0df8c91417784bfd84a6ad4869a0d2206
URL: https://github.com/llvm/llvm-project/commit/919021b0df8c91417784bfd84a6ad4869a0d2206
DIFF: https://github.com/llvm/llvm-project/commit/919021b0df8c91417784bfd84a6ad4869a0d2206.diff
LOG: [Arm64EC] Add support for `half` (#152843)
`f16` is passed and returned in vector registers on both x86 on AArch64,
the same calling convention as `f32`, so it is a straightforward type to
support. The calling convention support already exists, added as part of
a6065f0fa55a ("Arm64EC entry/exit thunks, consolidated. (#79067)").
Thus, add mangling and remove the error in order to make `half` work.
MSVC does not yet support `_Float16`, so for now this will remain an
LLVM-only extension.
Fixes the `f16` portion of
https://github.com/llvm/llvm-project/issues/94434
Added:
Modified:
llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
llvm/test/CodeGen/AArch64/powi-arm64ec.ll
llvm/test/CodeGen/Generic/half.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index ad8368e1692be..1169f26a2ae37 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -316,6 +316,12 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
ThunkArgTranslation::PointerIndirection};
};
+ if (T->isHalfTy()) {
+ // Prefix with `llvm` since MSVC doesn't specify `_Float16`
+ Out << "__llvm_h__";
+ return direct(T);
+ }
+
if (T->isFloatTy()) {
Out << "f";
return direct(T);
@@ -327,8 +333,8 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
}
if (T->isFloatingPointTy()) {
- report_fatal_error(
- "Only 32 and 64 bit floating points are supported for ARM64EC thunks");
+ report_fatal_error("Only 16, 32, and 64 bit floating points are supported "
+ "for ARM64EC thunks");
}
auto &DL = M->getDataLayout();
@@ -342,8 +348,16 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
uint64_t ElementCnt = T->getArrayNumElements();
uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(ElementTy) / 8;
uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes;
- if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) {
- Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes;
+ if (ElementTy->isHalfTy() || ElementTy->isFloatTy() ||
+ ElementTy->isDoubleTy()) {
+ if (ElementTy->isHalfTy())
+ // Prefix with `llvm` since MSVC doesn't specify `_Float16`
+ Out << "__llvm_H__";
+ else if (ElementTy->isFloatTy())
+ Out << "F";
+ else if (ElementTy->isDoubleTy())
+ Out << "D";
+ Out << TotalSizeBytes;
if (Alignment.value() >= 16 && !Ret)
Out << "a" << Alignment.value();
if (TotalSizeBytes <= 8) {
@@ -355,8 +369,9 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
return pointerIndirection(T);
}
} else if (T->isFloatingPointTy()) {
- report_fatal_error("Only 32 and 64 bit floating points are supported for "
- "ARM64EC thunks");
+ report_fatal_error(
+ "Only 16, 32, and 64 bit floating points are supported "
+ "for ARM64EC thunks");
}
}
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 6aeeeed94543d..709a17e32f58e 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -85,10 +85,10 @@ define i64 @simple_integers(i8, i16, i32, i64) nounwind {
ret i64 0
}
-; NOTE: Only float and double are supported.
-define double @simple_floats(float, double) nounwind {
-; CHECK-LABEL: .def $ientry_thunk$cdecl$d$fd;
-; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$fd
+; NOTE: Only half, float, and double are supported.
+define double @simple_floats(half, float, double) nounwind {
+; CHECK-LABEL: .def $ientry_thunk$cdecl$d$__llvm_h__fd;
+; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$__llvm_h__fd
; CHECK: // %bb.0:
; CHECK-NEXT: stp q6, q7, [sp, #-176]! // 32-byte Folded Spill
; CHECK-NEXT: .seh_save_any_reg_px q6, 176
@@ -600,7 +600,7 @@ start:
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$i8$i8i8i8i8
; CHECK-NEXT: .word 1
; CHECK-NEXT: .symidx "#simple_floats"
-; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$fd
+; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$__llvm_h__fd
; CHECK-NEXT: .word 1
; CHECK-NEXT: .symidx "#has_varargs"
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$v$varargs
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index cba7a8100930f..f829227a47cd7 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -93,10 +93,10 @@ declare i64 @simple_integers(i8, i16, i32, i64) nounwind;
; CHECK-NEXT: .seh_endfunclet
; CHECK-NEXT: .seh_endproc
-; NOTE: Only float and double are supported.
-declare double @simple_floats(float, double) nounwind;
-; CHECK-LABEL: .def $iexit_thunk$cdecl$d$fd;
-; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$fd
+; NOTE: Only half, float, and double are supported.
+declare double @simple_floats(half, float, double) nounwind;
+; CHECK-LABEL: .def $iexit_thunk$cdecl$d$__llvm_h__fd;
+; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$__llvm_h__fd
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #48
; CHECK-NEXT: .seh_stackalloc 48
@@ -129,8 +129,8 @@ declare double @simple_floats(float, double) nounwind;
; CHECK-NEXT: adrp x11, simple_floats
; CHECK-NEXT: add x11, x11, :lo12:simple_floats
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
-; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$fd
-; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$fd
+; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$__llvm_h__fd
+; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$__llvm_h__fd
; CHECK-NEXT: blr x8
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -282,33 +282,36 @@ declare void @has_aligned_sret(ptr align 32 sret(%TSRet)) nounwind;
; CHECK: .seh_endfunclet
; CHECK: .seh_endproc
-declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
-; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2F8;
-; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2F8
+declare [2 x i8] @small_array([2 x i8], [2 x half], [2 x float]) nounwind;
+; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2__llvm_H__4F8;
+; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2__llvm_H__4F8
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: .seh_stackalloc 64
-; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: .seh_save_fplr 48
-; CHECK-NEXT: add x29, sp, #48
-; CHECK-NEXT: .seh_add_fp 48
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: .seh_stackalloc 80
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_fplr 64
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: .seh_add_fp 64
; CHECK-NEXT: .seh_endprologue
-; CHECK-NEXT: sturb w1, [x29, #-1]
-; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
-; CHECK-NEXT: sturb w0, [x29, #-2]
-; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
-; CHECK-NEXT: stp s0, s1, [x29, #-12]
-; CHECK-NEXT: ldurh w0, [x29, #-2]
-; CHECK-NEXT: ldur x1, [x29, #-12]
-; CHECK-NEXT: blr x16
-; CHECK-NEXT: mov w0, w8
-; CHECK-NEXT: sturh w8, [x29, #-14]
-; CHECK-NEXT: ubfx w1, w8, #8, #8
+; CHECK-NEXT: sturb w0, [x29, #-2]
+; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
+; CHECK-NEXT: sturb w1, [x29, #-1]
+; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
+; CHECK-NEXT: stur h0, [x29, #-6]
+; CHECK-NEXT: ldurh w0, [x29, #-2]
+; CHECK-NEXT: stur h1, [x29, #-4]
+; CHECK-NEXT: stp s2, s3, [x29, #-16]
+; CHECK-NEXT: ldur w1, [x29, #-6]
+; CHECK-NEXT: ldur x2, [x29, #-16]
+; CHECK-NEXT: blr x16
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: sturh w8, [x29, #-18]
+; CHECK-NEXT: ubfx w1, w8, #8, #8
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: .seh_save_fplr 48
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: .seh_stackalloc 64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_fplr 64
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: .seh_stackalloc 80
; CHECK-NEXT: .seh_endepilogue
; CHECK-NEXT: ret
; CHECK-NEXT: .seh_endfunclet
@@ -325,8 +328,8 @@ declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
; CHECK-NEXT: adrp x11, small_array
; CHECK-NEXT: add x11, x11, :lo12:small_array
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
-; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2F8
-; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2F8
+; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2__llvm_H__4F8
+; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2__llvm_H__4F8
; CHECK-NEXT: blr x8
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -577,7 +580,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
; CHECK-NEXT: .symidx simple_integers
; CHECK-NEXT: .word 0
; CHECK-NEXT: .symidx simple_floats
-; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$fd
+; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$__llvm_h__fd
; CHECK-NEXT: .word 4
; CHECK-NEXT: .symidx "#simple_floats$exit_thunk"
; CHECK-NEXT: .symidx simple_floats
@@ -601,7 +604,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
; CHECK-NEXT: .symidx has_aligned_sret
; CHECK-NEXT: .word 0
; CHECK-NEXT: .symidx small_array
-; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2F8
+; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2__llvm_H__4F8
; CHECK-NEXT: .word 4
; CHECK-NEXT: .symidx "#small_array$exit_thunk"
; CHECK-NEXT: .symidx small_array
@@ -634,14 +637,14 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
define void @func_caller() nounwind {
call void @no_op()
call i64 @simple_integers(i8 0, i16 0, i32 0, i64 0)
- call double @simple_floats(float 0.0, double 0.0)
+ call double @simple_floats(half 0.0, float 0.0, double 0.0)
call void (...) @has_varargs()
%c = alloca i8
call void @has_sret(ptr sret([100 x i8]) %c)
%aligned = alloca %TSRet, align 32
store %TSRet { i64 0, i64 0 }, ptr %aligned, align 32
call void @has_aligned_sret(ptr align 32 sret(%TSRet) %aligned)
- call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0])
+ call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x half] [half 0.0, half 0.0], [2 x float] [float 0.0, float 0.0])
call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]])
call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)
diff --git a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
index ee326caa77c0a..c27d3c9588b9d 100644
--- a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
@@ -2,6 +2,15 @@
; Separate from llvm-frexp.ll test because this errors on half cases
+; ARM64EC-LABEL: test_frexp_f16_i32
+; ARM64EC: fcvt d0, h0
+; ARM64EC: bl "#frexp"
+; ARM64EC: fcvt h0, d0
+define { half, i32 } @test_frexp_f16_i32(half %a) {
+ %result = call { half, i32 } @llvm.frexp.f16.i32(half %a)
+ ret { half, i32 } %result
+}
+
; ARM64EC-LABEL: test_frexp_f32_i32
; ARM64EC: fcvt d0, s0
; ARM64EC: bl "#frexp"
diff --git a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
index 1f8eeccf9c338..0fde7b95f5462 100644
--- a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
@@ -3,6 +3,15 @@
; Separate from ldexp.ll test because this errors on half cases
+; ARM64EC-LABEL: ldexp_f16 =
+; ARM64EC: fcvt d0, h0
+; ARM64EC: bl "#ldexp"
+; ARM64EC: fcvt h0, d0
+define half @ldexp_f16(half %val, i32 %a) {
+ %call = call half @llvm.ldexp.f16(half %val, i32 %a)
+ ret half %call
+}
+
; ARM64EC-LABEL: ldexp_f32 =
; ARM64EC: fcvt d0, s0
; ARM64EC: bl "#ldexp"
diff --git a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
index 707159eb432ec..2e38f3c5e9a54 100644
--- a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
@@ -1,8 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm64ec-windows-msvc < %s | FileCheck -check-prefix=ARM64EC %s
-declare double @llvm.powi.f64.i32(double, i32)
+declare half @llvm.powi.f16.i32(half, i32)
declare float @llvm.powi.f32.i32(float, i32)
+declare double @llvm.powi.f64.i32(double, i32)
+
+; ARM64EC-LABEL: powi_f16
+; ARM64EC: fcvt s0, h0
+; ARM64EC: scvtf s1, w0
+; ARM64EC: bl "#powf"
+define half @powi_f16(half %x, i32 %n) nounwind {
+ %ret = tail call half @llvm.powi.f16.i32(half %x, i32 %n)
+ ret half %ret
+}
; ARM64EC-LABEL: powi_f32
; ARM64EC: scvtf s1, w0
diff --git a/llvm/test/CodeGen/Generic/half.ll b/llvm/test/CodeGen/Generic/half.ll
index f4ea5b5b30621..9d6c8eb2730d2 100644
--- a/llvm/test/CodeGen/Generic/half.ll
+++ b/llvm/test/CodeGen/Generic/half.ll
@@ -7,8 +7,7 @@
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
-; FIXME(#94434) unsupported on arm64ec
-; RUN: %if aarch64-registered-target %{ ! llc %s -o - -mtriple=arm64ec-pc-windows-msvc -filetype=null %}
+; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=arm64ec-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK %}
@@ -47,6 +46,8 @@
; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %}
; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
+; Note that arm64ec labels are quoted, hence the `{{"?}}:`.
+
; Codegen tests don't work the same for graphics targets. Add a dummy directive
; for filecheck, just make sure we don't crash.
; NOCRASH: {{.*}}
@@ -58,7 +59,7 @@
; Regression test for https://github.com/llvm/llvm-project/issues/97981.
define half @from_bits(i16 %bits) nounwind {
-; ALL-LABEL: from_bits:
+; ALL-LABEL: from_bits{{"?}}:
; CHECK-NOT: __extend
; CHECK-NOT: __trunc
; CHECK-NOT: __gnu
@@ -68,7 +69,7 @@ define half @from_bits(i16 %bits) nounwind {
}
define i16 @to_bits(half %f) nounwind {
-; ALL-LABEL: to_bits:
+; ALL-LABEL: to_bits{{"?}}:
; CHECK-NOT: __extend
; CHECK-NOT: __trunc
; CHECK-NOT: __gnu
@@ -81,7 +82,7 @@ define i16 @to_bits(half %f) nounwind {
; https://github.com/llvm/llvm-project/issues/117337 and similar issues.
define half @check_freeze(half %f) nounwind {
-; ALL-LABEL: check_freeze:
+; ALL-LABEL: check_freeze{{"?}}:
%t0 = freeze half %f
ret half %t0
}
More information about the llvm-commits
mailing list