[clang] abeeae5 - [X86] Support `_Float16` on SSE2 and up

Phoebe Wang via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 30 02:21:45 PDT 2022


Author: Phoebe Wang
Date: 2022-06-30T17:21:37+08:00
New Revision: abeeae570efff38dceccf68f5352809c58ffdda2

URL: https://github.com/llvm/llvm-project/commit/abeeae570efff38dceccf68f5352809c58ffdda2
DIFF: https://github.com/llvm/llvm-project/commit/abeeae570efff38dceccf68f5352809c58ffdda2.diff

LOG: [X86] Support `_Float16` on SSE2 and up

This is split from D113107 to address #56204 and https://discourse.llvm.org/t/how-to-build-compiler-rt-for-new-x86-half-float-abi/63366

Reviewed By: zahiraam, rjmccall, bkramer, MaskRay

Differential Revision: https://reviews.llvm.org/D128571

Added: 
    clang/test/CodeGen/X86/Float16-arithmetic.c
    clang/test/CodeGen/X86/Float16-complex.c

Modified: 
    clang/docs/LanguageExtensions.rst
    clang/docs/ReleaseNotes.rst
    clang/lib/Basic/Targets/X86.cpp
    clang/test/Sema/Float16.c
    clang/test/Sema/conversion-target-dep.c
    clang/test/SemaCXX/Float16.cpp
    compiler-rt/test/builtins/CMakeLists.txt

Removed: 
    clang/test/CodeGen/X86/avx512fp16-complex.c


################################################################################
diff  --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index af697fafd8c41..1bac2aee84bd9 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -743,7 +743,13 @@ targets pending ABI standardization:
 * 64-bit ARM (AArch64)
 * AMDGPU
 * SPIR
-* X86 (Only available under feature AVX512-FP16)
+* X86 (see below)
+
+On X86 targets, ``_Float16`` is supported as long as SSE2 is available, which
+includes all 64-bit and all recent 32-bit processors. When the target supports
+AVX512-FP16, ``_Float16`` arithmetic is performed using that native support.
+Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``,
+performing the operation, and then truncating to ``_Float16``.
 
 ``_Float16`` will be supported on more targets as they define ABIs for it.
 

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 99288d0ffac4e..b80e401e02f6c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -516,6 +516,9 @@ X86 Support in Clang
 
 - Support ``-mharden-sls=[none|all|return|indirect-jmp]`` for straight-line
   speculation hardening.
+- Support for the ``_Float16`` type has been added for all targets with SSE2.
+  When AVX512-FP16 is not available, arithmetic on ``_Float16`` is emulated
+  using ``float``.
 
 DWARF Support in Clang
 ----------------------

diff  --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index b83b3517ddf90..06988830eaed6 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -239,7 +239,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512ER = true;
     } else if (Feature == "+avx512fp16") {
       HasAVX512FP16 = true;
-      HasFloat16 = true;
     } else if (Feature == "+avx512pf") {
       HasAVX512PF = true;
     } else if (Feature == "+avx512dq") {
@@ -355,6 +354,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
                            .Default(NoSSE);
     SSELevel = std::max(SSELevel, Level);
 
+    HasFloat16 = SSELevel >= SSE2;
+
     MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
                                       .Case("+3dnowa", AMD3DNowAthlon)
                                       .Case("+3dnow", AMD3DNow)

diff  --git a/clang/test/CodeGen/X86/Float16-arithmetic.c b/clang/test/CodeGen/X86/Float16-arithmetic.c
new file mode 100644
index 0000000000000..aa61f7cb3c65f
--- /dev/null
+++ b/clang/test/CodeGen/X86/Float16-arithmetic.c
@@ -0,0 +1,112 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+
+// CHECK-LABEL: @add1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    ret half [[ADD]]
+//
+_Float16 add1(_Float16 a, _Float16 b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[ADD1:%.*]] = fadd half [[ADD]], [[TMP2]]
+// CHECK-NEXT:    ret half [[ADD1]]
+//
+_Float16 add2(_Float16 a, _Float16 b, _Float16 c) {
+  return a + b + c;
+}
+
+// CHECK-LABEL: @div(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    ret half [[DIV]]
+//
+_Float16 div(_Float16 a, _Float16 b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @mul(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    ret half [[MUL]]
+//
+_Float16 mul(_Float16 a, _Float16 b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @add_and_mul1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    store half [[D:%.*]], ptr [[D_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2
+// CHECK-NEXT:    [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[MUL]], [[MUL1]]
+// CHECK-NEXT:    ret half [[ADD]]
+//
+_Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+  return a * b + c * d;
+}
+
+// CHECK-LABEL: @add_and_mul2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    store half [[D:%.*]], ptr [[D_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[MUL:%.*]] = fmul half 0xH4600, [[TMP1]]
+// CHECK-NEXT:    [[SUB:%.*]] = fsub half [[TMP0]], [[MUL]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[SUB]], [[TMP2]]
+// CHECK-NEXT:    ret half [[ADD]]
+//
+_Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+  return (a - 6 * b) + c;
+}

diff  --git a/clang/test/CodeGen/X86/avx512fp16-complex.c b/clang/test/CodeGen/X86/Float16-complex.c
similarity index 96%
rename from clang/test/CodeGen/X86/avx512fp16-complex.c
rename to clang/test/CodeGen/X86/Float16-complex.c
index 8a6b50eb0056b..ebb290c976e7d 100644
--- a/clang/test/CodeGen/X86/avx512fp16-complex.c
+++ b/clang/test/CodeGen/X86/Float16-complex.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
 
 _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) {
   // X86-LABEL: @add_half_rr(

diff  --git a/clang/test/Sema/Float16.c b/clang/test/Sema/Float16.c
index f0b94666f74d7..26c604fed27aa 100644
--- a/clang/test/Sema/Float16.c
+++ b/clang/test/Sema/Float16.c
@@ -1,5 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
 // RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
 // RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
 // RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE

diff  --git a/clang/test/Sema/conversion-target-dep.c b/clang/test/Sema/conversion-target-dep.c
index 958a4d8b1f076..1b22cb8209343 100644
--- a/clang/test/Sema/conversion-target-dep.c
+++ b/clang/test/Sema/conversion-target-dep.c
@@ -6,7 +6,7 @@
 
 long double ld;
 double d;
-_Float16 f16; // x86-error {{_Float16 is not supported on this target}}
+_Float16 f16;
 
 int main(void) {
   ld = d; // x86-warning {{implicit conversion increases floating-point precision: 'double' to 'long double'}}

diff  --git a/clang/test/SemaCXX/Float16.cpp b/clang/test/SemaCXX/Float16.cpp
index f27c3839854e1..61b02a50687b9 100644
--- a/clang/test/SemaCXX/Float16.cpp
+++ b/clang/test/SemaCXX/Float16.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
 // RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
 // RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
 // RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE

diff  --git a/compiler-rt/test/builtins/CMakeLists.txt b/compiler-rt/test/builtins/CMakeLists.txt
index d56ffc69763b6..1579e223e875d 100644
--- a/compiler-rt/test/builtins/CMakeLists.txt
+++ b/compiler-rt/test/builtins/CMakeLists.txt
@@ -44,9 +44,17 @@ foreach(arch ${BUILTIN_TEST_ARCH})
     string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}")
   endif()
 
-  if (${arch} MATCHES "arm|aarch64|arm64" AND COMPILER_RT_HAS_FLOAT16)
-    list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16)
-    string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}")
+  if(APPLE)
+    # TODO: Support the new ABI on Apple platforms.
+    if (${arch} MATCHES "arm|aarch64|arm64" AND COMPILER_RT_HAS_FLOAT16)
+      list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16)
+      string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}")
+    endif()
+  else()
+    if (${arch} MATCHES "arm|aarch64|arm64|i?86|x86_64|AMD64" AND COMPILER_RT_HAS_FLOAT16)
+      list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16)
+      string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}")
+    endif()
   endif()
 
   if(COMPILER_RT_ENABLE_CET)


        


More information about the cfe-commits mailing list