[clang] [OpenCL] Set half-precision Div and Sqrt accuracy (PR #179621)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 3 23:36:00 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Wenju He (wenju-he)
<details>
<summary>Changes</summary>
OpenCL spec relaxed half-precision divide to 1 ULP and sqrt to 1.5 ULP in https://github.com/KhronosGroup/OpenCL-Docs/pull/1293 https://github.com/KhronosGroup/OpenCL-Docs/pull/1386
---
Full diff: https://github.com/llvm/llvm-project/pull/179621.diff
3 Files Affected:
- (modified) clang/lib/CodeGen/CGExpr.cpp (+8-6)
- (modified) clang/test/CodeGenOpenCL/fpmath.cl (+41-3)
- (modified) clang/test/CodeGenOpenCL/sqrt-fpmath.cl (+13-7)
``````````diff
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 339314ecff9cd..71a14d65c1bfe 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -6979,14 +6979,15 @@ void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, float Accuracy) {
void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) {
llvm::Type *EltTy = Val->getType()->getScalarType();
- if (!EltTy->isFloatTy())
+ if (!EltTy->isFloatTy() && !EltTy->isHalfTy())
return;
if ((getLangOpts().OpenCL &&
!CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
(getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
!CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
- // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 3ulp
+ // OpenCL v1.1 s7.4: minimum accuracy of single precision sqrt is 3 ulp.
+ // OpenCL v3.0 s7.4: minimum accuracy of half precision sqrt is 1.5 ulp.
//
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
@@ -6994,20 +6995,21 @@ void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) {
// source are correctly rounded.
//
// TODO: CUDA has a prec-sqrt flag
- SetFPAccuracy(Val, 3.0f);
+ SetFPAccuracy(Val, EltTy->isFloatTy() ? 3.0f : 1.5f);
}
}
void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) {
llvm::Type *EltTy = Val->getType()->getScalarType();
- if (!EltTy->isFloatTy())
+ if (!EltTy->isFloatTy() && !EltTy->isHalfTy())
return;
if ((getLangOpts().OpenCL &&
!CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
(getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
!CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
- // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
+ // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5 ulp.
+ // OpenCL v3.0 s7.4: minimum accuracy of half precision / is 1 ulp.
//
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
@@ -7015,7 +7017,7 @@ void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) {
// source are correctly rounded.
//
// TODO: CUDA has a prec-div flag
- SetFPAccuracy(Val, 2.5f);
+ SetFPAccuracy(Val, EltTy->isFloatTy() ? 2.5f : 1.f);
}
}
diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl
index f3649d52e0091..5915496b3963d 100644
--- a/clang/test/CodeGenOpenCL/fpmath.cl
+++ b/clang/test/CodeGenOpenCL/fpmath.cl
@@ -1,8 +1,44 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=NODIVOPT %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP16 -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s
+#ifndef NOFP16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+typedef __attribute__(( ext_vector_type(4) )) half half4;
+
+half hpscalardiv(half a, half b) {
+ // CHECK: @hpscalardiv
+ // CHECK: fdiv{{.*}},
+ // NODIVOPT: !fpmath ![[MD_HFDIV:[0-9]+]]
+ // DIVOPT-NOT: !fpmath !{{[0-9]+}}
+ return a / b;
+}
+
+half4 hpvectordiv(half4 a, half4 b) {
+ // CHECK: @hpvectordiv
+ // CHECK: fdiv{{.*}},
+ // NODIVOPT: !fpmath ![[MD_HFDIV]]
+ // DIVOPT-NOT: !fpmath !{{[0-9]+}}
+ return a / b;
+}
+
+half elementwise_sqrt_f16(half a) {
+ // CHECK-LABEL: @elementwise_sqrt_f16
+ // NODIVOPT: call half @llvm.sqrt.f16(half %{{.+}}), !fpmath ![[MD_HSQRT:[0-9]+]]
+ // DIVOPT: call half @llvm.sqrt.f16(half %{{.+}}){{$}}
+ return __builtin_elementwise_sqrt(a);
+}
+
+half4 elementwise_sqrt_v4f16(half4 a) {
+ // CHECK-LABEL: @elementwise_sqrt_v4f16
+ // NODIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}), !fpmath ![[MD_HSQRT]]
+ // DIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}){{$}}
+ return __builtin_elementwise_sqrt(a);
+}
+
+#endif // NOFP16
+
typedef __attribute__(( ext_vector_type(4) )) float float4;
float spscalardiv(float a, float b) {
@@ -30,14 +66,14 @@ float spscalarsqrt(float a) {
float elementwise_sqrt_f32(float a) {
// CHECK-LABEL: @elementwise_sqrt_f32
- // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+ // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT]]
// DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
return __builtin_elementwise_sqrt(a);
}
float4 elementwise_sqrt_v4f32(float4 a) {
// CHECK-LABEL: @elementwise_sqrt_v4f32
- // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+ // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT]]
// DIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
return __builtin_elementwise_sqrt(a);
}
@@ -90,5 +126,7 @@ double4 elementwise_sqrt_v4f64(double4 a) {
#endif
+// NODIVOPT: ![[MD_HFDIV]] = !{float 1.000000e+00}
+// NODIVOPT: ![[MD_HSQRT]] = !{float 1.500000e+00}
// NODIVOPT: ![[MD_FDIV]] = !{float 2.500000e+00}
// NODIVOPT: ![[MD_SQRT]] = !{float 3.000000e+00}
diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
index d0637283a7ec1..6f4adf56930ff 100644
--- a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
+++ b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
@@ -134,46 +134,52 @@ double16 call_sqrt_v16f64(double16 x) {
}
-// Not for f16
// CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
-// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH:\![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half call_sqrt_f16(half x) {
return sqrt(x);
}
// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
-// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half2 call_sqrt_v2f16(half2 x) {
return sqrt(x);
}
// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
-// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half3 call_sqrt_v3f16(half3 x) {
return sqrt(x);
}
// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
-// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half4 call_sqrt_v4f16(half4 x) {
return sqrt(x);
}
// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
-// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half8 call_sqrt_v8f16(half8 x) {
return sqrt(x);
}
// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
-// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+// DEFAULT: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
+// CORRECTLYROUNDED: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half16 call_sqrt_v16f16(half16 x) {
return sqrt(x);
}
// DEFAULT: [[FPMATH]] = !{float 3.000000e+00}
+// DEFAULT: [[HFPMATH]] = !{float 1.500000e+00}
``````````
</details>
https://github.com/llvm/llvm-project/pull/179621
More information about the cfe-commits
mailing list