[clang] clang/OpenCL: set sqrt fp accuracy on call to Z4sqrt (PR #66651)

Mon Sep 18 09:35:36 PDT 2023

https://github.com/rjodinchr updated https://github.com/llvm/llvm-project/pull/66651

>From b6df142239256e979a70896f324f9ed3547c640c Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin at chromium.org>
Date: Mon, 18 Sep 2023 09:34:56 +0200
Subject: [PATCH 1/2] Revert "clang/OpenCL: Add inline implementations of sqrt
 in builtin header"

This reverts commit 15e0fe0b6122e32657b98daf74a1fce028d2e5bf.
---
 clang/lib/Headers/opencl-c-base.h       |  58 -------
 clang/lib/Headers/opencl-c.h            |  26 +++
 clang/lib/Sema/OpenCLBuiltins.td        |   5 +-
 clang/test/CodeGenOpenCL/sqrt-fpmath.cl | 201 ------------------------
 4 files changed, 27 insertions(+), 263 deletions(-)
 delete mode 100644 clang/test/CodeGenOpenCL/sqrt-fpmath.cl

diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index d56e5ceae652ad5..2494f6213fc5695 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -819,64 +819,6 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)))
 
 #endif // cl_intel_device_side_avc_motion_estimation
 
-/**
- * Compute square root.
- *
- * Provide inline implementations using the builtin so that we get appropriate
- * !fpmath based on -cl-fp32-correctly-rounded-divide-sqrt, attached to
- * llvm.sqrt. The implementation should still provide an external definition.
- */
-#define __ovld __attribute__((overloadable))
-#define __cnfn __attribute__((const))
-
-inline float __ovld __cnfn sqrt(float __x) {
-  return __builtin_elementwise_sqrt(__x);
-}
-
-inline float2 __ovld __cnfn sqrt(float2 __x) {
-  return __builtin_elementwise_sqrt(__x);
-}
-
-inline float3 __ovld __cnfn sqrt(float3 __x) {
-  return __builtin_elementwise_sqrt(__x);
-}
-
-inline float4 __ovld __cnfn sqrt(float4 __x) {
-  return __builtin_elementwise_sqrt(__x);
-}
-
-inline float8 __ovld __cnfn sqrt(float8 __x) {
-  return __builtin_elementwise_sqrt(__x);
-}
-
-inline float16 __ovld __cnfn sqrt(float16 __x) {
-  return __builtin_elementwise_sqrt(__x);
-}
-
-// We only really want to define the float variants here. However
-// -fdeclare-opencl-builtins will not work if some overloads are already
- // provided in the base header, so provide all overloads here.
-
-#ifdef cl_khr_fp64
-double __ovld __cnfn sqrt(double);
-double2 __ovld __cnfn sqrt(double2);
-double3 __ovld __cnfn sqrt(double3);
-double4 __ovld __cnfn sqrt(double4);
-double8 __ovld __cnfn sqrt(double8);
-double16 __ovld __cnfn sqrt(double16);
-#endif //cl_khr_fp64
-#ifdef cl_khr_fp16
-half __ovld __cnfn sqrt(half);
-half2 __ovld __cnfn sqrt(half2);
-half3 __ovld __cnfn sqrt(half3);
-half4 __ovld __cnfn sqrt(half4);
-half8 __ovld __cnfn sqrt(half8);
-half16 __ovld __cnfn sqrt(half16);
-#endif //cl_khr_fp16
-
-#undef __cnfn
-#undef __ovld
-
 // Disable any extensions we may have enabled previously.
 #pragma OPENCL EXTENSION all : disable
 
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 1efbbf8f8ee6a01..288bb18bc654ebc 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -8496,6 +8496,32 @@ half8 __ovld __cnfn sinpi(half8);
 half16 __ovld __cnfn sinpi(half16);
 #endif //cl_khr_fp16
 
+/**
+ * Compute square root.
+ */
+float __ovld __cnfn sqrt(float);
+float2 __ovld __cnfn sqrt(float2);
+float3 __ovld __cnfn sqrt(float3);
+float4 __ovld __cnfn sqrt(float4);
+float8 __ovld __cnfn sqrt(float8);
+float16 __ovld __cnfn sqrt(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn sqrt(double);
+double2 __ovld __cnfn sqrt(double2);
+double3 __ovld __cnfn sqrt(double3);
+double4 __ovld __cnfn sqrt(double4);
+double8 __ovld __cnfn sqrt(double8);
+double16 __ovld __cnfn sqrt(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn sqrt(half);
+half2 __ovld __cnfn sqrt(half2);
+half3 __ovld __cnfn sqrt(half3);
+half4 __ovld __cnfn sqrt(half4);
+half8 __ovld __cnfn sqrt(half8);
+half16 __ovld __cnfn sqrt(half16);
+#endif //cl_khr_fp16
+
 /**
  * Compute tangent.
  */
diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
index 9db450281912d2f..0cceba090bd8f26 100644
--- a/clang/lib/Sema/OpenCLBuiltins.td
+++ b/clang/lib/Sema/OpenCLBuiltins.td
@@ -563,15 +563,12 @@ foreach name = ["acos", "acosh", "acospi",
                 "log", "log2", "log10", "log1p", "logb",
                 "rint", "round", "rsqrt",
                 "sin", "sinh", "sinpi",
+                "sqrt",
                 "tan", "tanh", "tanpi",
                 "tgamma", "trunc",
                 "lgamma"] in {
     def : Builtin<name, [FGenTypeN, FGenTypeN], Attr.Const>;
 }
-
-// sqrt is handled in opencl-c-base.h to handle
-// -cl-fp32-correctly-rounded-divide-sqrt.
-
 foreach name = ["nan"] in {
   def : Builtin<name, [GenTypeFloatVecAndScalar, GenTypeUIntVecAndScalar], Attr.Const>;
   def : Builtin<name, [GenTypeDoubleVecAndScalar, GenTypeULongVecAndScalar], Attr.Const>;
diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
deleted file mode 100644
index df30085cba2e7d5..000000000000000
--- a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
+++ /dev/null
@@ -1,201 +0,0 @@
-// Test that float variants of sqrt are emitted as available_externally inline
-// definitions that call the sqrt intrinsic with appropriate !fpmath metadata
-// depending on -cl-fp32-correctly-rounded-divide-sqrt
-
-// Test with -fdeclare-opencl-builtins
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
-
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
-
-// Test without -fdeclare-opencl-builtins
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
-
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
-// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// CHECK-LABEL: define {{.*}} float @call_sqrt_f32(
-// CHECK: call {{.*}} float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+$}}
-float call_sqrt_f32(float x) {
-  return sqrt(x);
-}
-
-// CHECK-LABEL: define available_externally float @_Z4sqrtf(float noundef %__x)
-// DEFAULT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
-
-// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}){{$}}
-
-// CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32(
-// CHECK: call {{.*}} <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.*}}) #{{[0-9]+$}}
-float2 call_sqrt_v2f32(float2 x) {
-  return sqrt(x);
-}
-
-// CHECK-LABEL: define available_externally <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %__x)
-// DEFAULT: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
-
-// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
-
-// CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32(
-// CHECK: call {{.*}} <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.*}}) #{{[0-9]+$}}
-float3 call_sqrt_v3f32(float3 x) {
-  return sqrt(x);
-}
-
-// CHECK-LABEL: define available_externally <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %__x)
-// DEFAULT: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
-
-// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
-
-
-// CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32(
-// CHECK: call {{.*}} <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.*}}) #{{[0-9]+$}}
-float4 call_sqrt_v4f32(float4 x) {
-  return sqrt(x);
-}
-
-// CHECK-LABEL: define available_externally <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %__x)
-// DEFAULT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
-
-// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
-
-// CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32(
-// CHECK: call {{.*}} <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.*}}) #{{[0-9]+$}}
-float8 call_sqrt_v8f32(float8 x) {
-  return sqrt(x);
-}
-
-// CHECK-LABEL: define available_externally <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %__x)
-// DEFAULT: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
-
-// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
-
-
-// CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32(
-// CHECK: call {{.*}} <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.*}}) #{{[0-9]+$}}
-float16 call_sqrt_v16f32(float16 x) {
-  return sqrt(x);
-}
-
-// CHECK-LABEL: define available_externally <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %__x)
-// DEFAULT: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
-
-// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
-// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
-
-
-// Not for f64
-// CHECK-LABEL: define {{.*}} double @call_sqrt_f64(
-// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}
-double call_sqrt_f64(double x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// Not for f64
-// CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64(
-// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}
-double2 call_sqrt_v2f64(double2 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64(
-// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}
-double3 call_sqrt_v3f64(double3 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64(
-// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}
-double4 call_sqrt_v4f64(double4 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64(
-// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}
-double8 call_sqrt_v8f64(double8 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64(
-// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}
-double16 call_sqrt_v16f64(double16 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// Not for f16
-// CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
-// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}
-half call_sqrt_f16(half x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
-// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}
-half2 call_sqrt_v2f16(half2 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
-// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}
-half3 call_sqrt_v3f16(half3 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
-// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}
-half4 call_sqrt_v4f16(half4 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
-// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}
-half8 call_sqrt_v8f16(half8 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
-// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}
-half16 call_sqrt_v16f16(half16 x) {
-  return sqrt(x);
-}
-
-// CHECK-NOT: define
-
-// DEFAULT: [[$FPMATH]] = !{float 3.000000e+00}

>From 1b37c7feb97ef47d7b06512f82b46b3df8a0085f Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin at chromium.org>
Date: Mon, 18 Sep 2023 16:15:06 +0200
Subject: [PATCH 2/2] clang/OpenCL: set sqrt fp accuracy on call to Z4sqrt

---
 clang/lib/CodeGen/CGCall.cpp            |   4 +
 clang/test/CodeGenOpenCL/sqrt-fpmath.cl | 179 ++++++++++++++++++++++++
 2 files changed, 183 insertions(+)
 create mode 100644 clang/test/CodeGenOpenCL/sqrt-fpmath.cl

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e15a4634b1d041b..0829e93ba304962 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5612,6 +5612,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
                               BundleList);
     EmitBlock(Cont);
   }
+  if (CI->getCalledFunction() && CI->getCalledFunction()->hasName() &&
+      CI->getCalledFunction()->getName().startswith("_Z4sqrt")) {
+    SetSqrtFPAccuracy(CI);
+  }
   if (callOrInvoke)
     *callOrInvoke = CI;
 
diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
new file mode 100644
index 000000000000000..7afde7f91bdfeb5
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl
@@ -0,0 +1,179 @@
+// Test that float variants of sqrt are emitted as available_externally inline
+// definitions that call the sqrt intrinsic with appropriate !fpmath metadata
+// depending on -cl-fp32-correctly-rounded-divide-sqrt
+
+// Test with -fdeclare-opencl-builtins
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o %t.ll %s
+// RUN: FileCheck -check-prefixes=CHECK,DEFAULT %s < %t.ll
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o %t.ll %s
+// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s < %t.ll
+
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s
+// RUN: FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s < %t.ll
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s
+// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s < %t.ll
+
+// Test without -fdeclare-opencl-builtins
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
+
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CHECK-LABEL: define {{.*}} float @call_sqrt_f32(
+// DEFAULT: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}}
+float call_sqrt_f32(float x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32(
+// DEFAULT: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLYROUNDED: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+float2 call_sqrt_v2f32(float2 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32(
+// DEFAULT: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLYROUNDED: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+float3 call_sqrt_v3f32(float3 x) {
+  return sqrt(x);
+}
+
+
+
+// CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32(
+// DEFAULT: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLYROUNDED: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+float4 call_sqrt_v4f32(float4 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32(
+// DEFAULT: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLYROUNDED: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+float8 call_sqrt_v8f32(float8 x) {
+  return sqrt(x);
+}
+
+
+
+// CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32(
+// DEFAULT: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLYROUNDED: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
+// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
+float16 call_sqrt_v16f32(float16 x) {
+  return sqrt(x);
+}
+
+
+
+// Not for f64
+// CHECK-LABEL: define {{.*}} double @call_sqrt_f64(
+// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+double call_sqrt_f64(double x) {
+  return sqrt(x);
+}
+
+
+// Not for f64
+// CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64(
+// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+double2 call_sqrt_v2f64(double2 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64(
+// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+double3 call_sqrt_v3f64(double3 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64(
+// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+double4 call_sqrt_v4f64(double4 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64(
+// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+double8 call_sqrt_v8f64(double8 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64(
+// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+double16 call_sqrt_v16f64(double16 x) {
+  return sqrt(x);
+}
+
+
+// Not for f16
+// CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
+// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+half call_sqrt_f16(half x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
+// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+half2 call_sqrt_v2f16(half2 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
+// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+half3 call_sqrt_v3f16(half3 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
+// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+half4 call_sqrt_v4f16(half4 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
+// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+half8 call_sqrt_v8f16(half8 x) {
+  return sqrt(x);
+}
+
+
+// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
+// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
+half16 call_sqrt_v16f16(half16 x) {
+  return sqrt(x);
+}
+
+// DEFAULT: [[FPMATH]] = !{float 3.000000e+00}