[libclc] libclc: Add div_cr utility function (PR #185730)

Tue Mar 10 23:21:35 PDT 2026

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/185730

>From c3155feadc903cab96c1b1917db96f762782041f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 10 Mar 2026 20:16:14 +0100
Subject: [PATCH 1/2] libclc: Add div_cr utility function

This is a workaround for the modal div operator precision. The
OpenCL default is not correctly rounded, so this provides a backdoor
to get a correctly rounded fdiv. Ideally clang would have a builtin
or some other mechanism to control the precision.
---
 libclc/clc/include/clc/math/clc_div_cr.h   | 26 ++++++++++++++++++++++
 libclc/clc/lib/generic/CMakeLists.txt      |  4 ++++
 libclc/clc/lib/generic/math/clc_div_cr.cl  | 11 +++++++++
 libclc/clc/lib/generic/math/clc_div_cr.inc | 12 ++++++++++
 4 files changed, 53 insertions(+)
 create mode 100644 libclc/clc/include/clc/math/clc_div_cr.h
 create mode 100644 libclc/clc/lib/generic/math/clc_div_cr.cl
 create mode 100644 libclc/clc/lib/generic/math/clc_div_cr.inc

diff --git a/libclc/clc/include/clc/math/clc_div_cr.h b/libclc/clc/include/clc/math/clc_div_cr.h
new file mode 100644
index 0000000000000..cf3f2ce1224f3
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_div_cr.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_DIV_CR_H__
+#define __CLC_MATH_DIV_CR_H__
+
+// Declare overloads of __clc_div_cr. This is a wrapper around the
+// floating-point / operator. This is a utilty to deal with the language default
+// division not being correctly rounded, and requires the
+// -cl-fp32-correctly-rounded-divide-sqrt flag. This will just be the operator
+// compiled with that option. Ideally clang would expose a direct way to get the
+// correctly rounded and opencl precision versions.
+
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_div_cr
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_DIV_CR_H__
diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt
index ce7614726e21e..2bd8bf64fc9c3 100644
--- a/libclc/clc/lib/generic/CMakeLists.txt
+++ b/libclc/clc/lib/generic/CMakeLists.txt
@@ -72,6 +72,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
   math/clc_cos.cl
   math/clc_cosh.cl
   math/clc_cospi.cl
+  math/clc_div_cr.cl
   math/clc_ep_log.cl
   math/clc_erf.cl
   math/clc_erfc.cl
@@ -206,3 +207,6 @@ libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func
   math/clc_native_sqrt.cl
   math/clc_native_tan.cl
 )
+
+libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -cl-fp32-correctly-rounded-divide-sqrt
+  math/clc_div_cr.cl)
diff --git a/libclc/clc/lib/generic/math/clc_div_cr.cl b/libclc/clc/lib/generic/math/clc_div_cr.cl
new file mode 100644
index 0000000000000..00af7708e28a0
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_div_cr.cl
@@ -0,0 +1,11 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/math/clc_div_cr.h"
+#define __CLC_BODY <clc_div_cr.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_div_cr.inc b/libclc/clc/lib/generic/math/clc_div_cr.inc
new file mode 100644
index 0000000000000..79e70996ef4fa
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_div_cr.inc
@@ -0,0 +1,12 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_div_cr(__CLC_GENTYPE x,
+                                                  __CLC_GENTYPE y) {
+  return x / y;
+}

>From 03bb4303474fe485d7f35d11443e9b98e57de8be Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 11 Mar 2026 07:21:15 +0100
Subject: [PATCH 2/2] Reorder

---
 libclc/clc/include/clc/math/clc_div_cr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libclc/clc/include/clc/math/clc_div_cr.h b/libclc/clc/include/clc/math/clc_div_cr.h
index cf3f2ce1224f3..c88dc7573506f 100644
--- a/libclc/clc/include/clc/math/clc_div_cr.h
+++ b/libclc/clc/include/clc/math/clc_div_cr.h
@@ -16,8 +16,8 @@
 // compiled with that option. Ideally clang would expose a direct way to get the
 // correctly rounded and opencl precision versions.
 
-#define __CLC_BODY <clc/shared/binary_decl.inc>
 #define __CLC_FUNCTION __clc_div_cr
+#define __CLC_BODY <clc/shared/binary_decl.inc>
 
 #include <clc/math/gentype.inc>