[clang] [llvm] Enable HasFastHalfType to prevent FP16 promotion and perform FABS/FNEG lowering (PR #189395)

Mon Mar 30 07:28:50 PDT 2026

https://github.com/tharunvk created https://github.com/llvm/llvm-project/pull/189395

This patch prevents the unwanted promotion of _Float16 FNEG and FABS 
operations to float at -O0, ensuring bitwise correctness and 
improving performance.

Previously, at -O0, Clang would emit fpext/fptrunc sequences for 
these operations because HasFastHalfType was false. This could 
potentially alter NaN payloads and introduced unnecessary 
instruction overhead.
Changes:
  Clang: Set HasFastHalfType to true for X86 targets with SSE2+( already set for avxfp16)
   This signals the frontend that the backend can handle half-precision 
   bitwise operations natively in IR.

Fixes : https://github.com/llvm/llvm-project/issues/188201

>From 302777cea89b2cfdf6771a08646cff8bd7fb078e Mon Sep 17 00:00:00 2001
From: Tharun V K <tharunms98 at gmail.com>
Date: Mon, 30 Mar 2026 19:54:27 +0530
Subject: [PATCH] Enable HasFastHalfType to prevent FP16 promotion and perform
 FABS/FNEG lowering

---
 clang/lib/Basic/Targets/X86.cpp            |  2 +
 clang/test/CodeGen/X86/Float16-fneg-fabs.c | 18 +++++++++
 llvm/test/CodeGen/X86/half-fneg-fabs.ll    | 46 ++++++++++++++++++++++
 3 files changed, 66 insertions(+)
 create mode 100644 clang/test/CodeGen/X86/Float16-fneg-fabs.c
 create mode 100644 llvm/test/CodeGen/X86/half-fneg-fabs.ll

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index cb941c94c84a7..b6138fee89b4c 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -471,6 +471,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
     SSELevel = std::max(SSELevel, Level);
 
     HasFloat16 = SSELevel >= SSE2;
+    if (HasFloat16)
+      HasFastHalfType = true;
 
     // X86 target has bfloat16 emulation support in the backend, where
     // bfloat16 is treated as a 32-bit float, arithmetic operations are
diff --git a/clang/test/CodeGen/X86/Float16-fneg-fabs.c b/clang/test/CodeGen/X86/Float16-fneg-fabs.c
new file mode 100644
index 0000000000000..92760bd377926
--- /dev/null
+++ b/clang/test/CodeGen/X86/Float16-fneg-fabs.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +sse2 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +f16c -O0 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK
+
+_Float16 test_fneg(_Float16 x) {
+  // CHECK-LABEL: define {{.*}} @test_fneg
+  // CHECK-NOT: fpext
+  // CHECK: fneg half
+  // CHECK-NOT: fptrunc
+  return -x;
+}
+
+_Float16 test_fabs(_Float16 x) {
+  // CHECK-LABEL: define {{.*}} @test_fabs
+  // CHECK-NOT: fpext
+  // CHECK: call half @llvm.fabs.f16(half
+  // CHECK-NOT: fptrunc
+  return __builtin_fabsf16(x);
+}
diff --git a/llvm/test/CodeGen/X86/half-fneg-fabs.ll b/llvm/test/CodeGen/X86/half-fneg-fabs.ll
new file mode 100644
index 0000000000000..d24a5fbe1dfe7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/half-fneg-fabs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c | FileCheck %s --check-prefixes=F16C
+
+; Test Floating Point Negation (fneg)
+define half @test_fneg(half %a) nounwind {
+; SSE2-LABEL: test_fneg:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: test_fneg:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; F16C-LABEL: test_fneg:
+; F16C:       # %bb.0:
+; F16C-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; F16C-NEXT:    retq
+  %res = fneg half %a
+  ret half %res
+}
+
+; Test Floating Point Absolute Value (fabs)
+define half @test_fabs(half %a) nounwind {
+; SSE2-LABEL: test_fabs:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: test_fabs:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; F16C-LABEL: test_fabs:
+; F16C:       # %bb.0:
+; F16C-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; F16C-NEXT:    retq
+  %res = call half @llvm.fabs.f16(half %a)
+  ret half %res
+}
+
+declare half @llvm.fabs.f16(half)