[clang] [OpenCL] Set half-precision Div and Sqrt accuracy (PR #179621)

Tue Feb 3 23:36:00 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: Wenju He (wenju-he)

<details>
<summary>Changes</summary>

OpenCL spec relaxed half-precision divide to 1 ULP and sqrt to 1.5 ULP in https://github.com/KhronosGroup/OpenCL-Docs/pull/1293 https://github.com/KhronosGroup/OpenCL-Docs/pull/1386

---
Full diff: https://github.com/llvm/llvm-project/pull/179621.diff


3 Files Affected:

- (modified) clang/lib/CodeGen/CGExpr.cpp (+8-6) 
- (modified) clang/test/CodeGenOpenCL/fpmath.cl (+41-3) 
- (modified) clang/test/CodeGenOpenCL/sqrt-fpmath.cl (+13-7) 


``````````diff

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 339314ecff9cd..71a14d65c1bfe 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -6979,14 +6979,15 @@ void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, float Accuracy) {
 
 void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) {
   llvm::Type *EltTy = Val->getType()->getScalarType();
-  if (!EltTy->isFloatTy())
+  if (!EltTy->isFloatTy() && !EltTy->isHalfTy())
     return;
 
   if ((getLangOpts().OpenCL &&
        !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
       (getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
        !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
-    // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 3ulp
+    // OpenCL v1.1 s7.4: minimum accuracy of single precision sqrt is 3 ulp.
+    // OpenCL v3.0 s7.4: minimum accuracy of half precision sqrt is 1.5 ulp.
     //
     // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
     // build option allows an application to specify that single precision
@@ -6994,20 +6995,21 @@ void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) {
     // source are correctly rounded.
     //
     // TODO: CUDA has a prec-sqrt flag
-    SetFPAccuracy(Val, 3.0f);
+    SetFPAccuracy(Val, EltTy->isFloatTy() ? 3.0f : 1.5f);
   }
 }
 
 void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) {
   llvm::Type *EltTy = Val->getType()->getScalarType();
-  if (!EltTy->isFloatTy())
+  if (!EltTy->isFloatTy() && !EltTy->isHalfTy())
     return;
 
   if ((getLangOpts().OpenCL &&
        !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
       (getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
        !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
-    // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
+    // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5 ulp.
+    // OpenCL v3.0 s7.4: minimum accuracy of half precision / is 1 ulp.
     //
     // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
     // build option allows an application to specify that single precision
@@ -7015,7 +7017,7 @@ void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) {
     // source are correctly rounded.
     //
     // TODO: CUDA has a prec-div flag
-    SetFPAccuracy(Val, 2.5f);
+    SetFPAccuracy(Val, EltTy->isFloatTy() ? 2.5f : 1.f);
   }
 }
 
diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl
index f3649d52e0091..5915496b3963d 100644
--- a/clang/test/CodeGenOpenCL/fpmath.cl
+++ b/clang/test/CodeGenOpenCL/fpmath.cl
@@ -1,8 +1,44 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=NODIVOPT %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP16 -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s
 
+#ifndef NOFP16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+typedef __attribute__(( ext_vector_type(4) )) half half4;
+
+half hpscalardiv(half a, half b) {
+  // CHECK: @hpscalardiv
+  // CHECK: fdiv{{.*}},
+  // NODIVOPT: !fpmath ![[MD_HFDIV:[0-9]+]]
+  // DIVOPT-NOT: !fpmath !{{[0-9]+}}
+  return a / b;
+}
+
+half4 hpvectordiv(half4 a, half4 b) {
+  // CHECK: @hpvectordiv
+  // CHECK: fdiv{{.*}},
+  // NODIVOPT: !fpmath ![[MD_HFDIV]]
+  // DIVOPT-NOT: !fpmath !{{[0-9]+}}
+  return a / b;
+}
+
+half elementwise_sqrt_f16(half a) {
+  // CHECK-LABEL: @elementwise_sqrt_f16
+  // NODIVOPT: call half @llvm.sqrt.f16(half %{{.+}}), !fpmath ![[MD_HSQRT:[0-9]+]]
+  // DIVOPT: call half @llvm.sqrt.f16(half %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+half4 elementwise_sqrt_v4f16(half4 a) {
+  // CHECK-LABEL: @elementwise_sqrt_v4f16
+  // NODIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}), !fpmath ![[MD_HSQRT]]
+  // DIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+#endif // NOFP16
+
 typedef __attribute__(( ext_vector_type(4) )) float float4;
 
 float spscalardiv(float a, float b) {
@@ -30,14 +66,14 @@ float spscalarsqrt(float a) {
 
 float elementwise_sqrt_f32(float a) {
   // CHECK-LABEL: @elementwise_sqrt_f32
-  // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+  // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT]]
   // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
   return __builtin_elementwise_sqrt(a);
 }
 
 float4 elementwise_sqrt_v4f32(float4 a) {
   // CHECK-LABEL: @elementwise_sqrt_v4f32
-  // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+  // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT]]
   // DIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
   return __builtin_elementwise_sqrt(a);
 }
@@ -90,5 +126,7 @@ double4 elementwise_sqrt_v4f64(double4 a) {
 
 #endif
 
+// NODIVOPT: ![[MD_HFDIV]] = !{float 1.000000e+00}
+// NODIVOPT: ![[MD_HSQRT]] = !{float 1.500000e+00}
 // NODIVOPT: ![[MD_FDIV]] = !{float 2.500000e+00}
 // NODIVOPT: ![[MD_SQRT]] = !{float 3.000000e+00}
diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
index d0637283a7ec1..6f4adf56930ff 100644
--- a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
+++ b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
@@ -134,46 +134,52 @@ double16 call_sqrt_v16f64(double16 x) {
 }
 
 
-// Not for f16
 // CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
-// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH:\![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
 half call_sqrt_f16(half x) {
   return sqrt(x);
 }
 
 
 // CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
-// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
 half2 call_sqrt_v2f16(half2 x) {
   return sqrt(x);
 }
 
 
 // CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
-// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
 half3 call_sqrt_v3f16(half3 x) {
   return sqrt(x);
 }
 
 
 // CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
-// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
 half4 call_sqrt_v4f16(half4 x) {
   return sqrt(x);
 }
 
 
 // CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
-// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
 half8 call_sqrt_v8f16(half8 x) {
   return sqrt(x);
 }
 
 
 // CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
-// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
 half16 call_sqrt_v16f16(half16 x) {
   return sqrt(x);
 }
 
 // DEFAULT: [[FPMATH]] = !{float 3.000000e+00}
+// DEFAULT: [[HFPMATH]] = !{float 1.500000e+00}

``````````

</details>


https://github.com/llvm/llvm-project/pull/179621