[llvm-branch-commits] [libclc] libclc: Use fshr builtin in sincos helpers (PR #186427)

Fri Mar 13 08:53:19 PDT 2026

https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/186427

None

>From 2bc56d677af3466a905d52d8fb552884b46db250 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 13 Mar 2026 09:54:20 +0100
Subject: [PATCH] libclc: Use fshr builtin in sincos helpers

---
 libclc/clc/lib/generic/math/clc_sincos_helpers.cl  |  2 +-
 libclc/clc/lib/generic/math/clc_sincos_helpers.inc | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
index 19705c42f6f07..aeba3c14dd9b9 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
@@ -16,7 +16,7 @@
 #include <clc/math/clc_trunc.h>
 #include <clc/math/math.h>
 
-#define bitalign(hi, lo, shift) ((hi) << (32 - (shift))) | ((lo) >> (shift));
+#define bitalign(hi, lo, shift) __builtin_elementwise_fshr(hi, lo, shift)
 
 #define __CLC_FULL_MUL(A, B, HI, LO)                                           \
   LO = A * B;                                                                  \
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index 2a71b5626ccc5..d945438b6ae1d 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -275,9 +275,9 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS(
   __CLC_INTN i = __CLC_AS_INTN(p7 >> 29U);
 
   // Scoot up 2 more bits so only fraction remains
-  p7 = bitalign(p7, p6, 30);
-  p6 = bitalign(p6, p5, 30);
-  p5 = bitalign(p5, p4, 30);
+  p7 = bitalign(p7, p6, (__CLC_UINTN)30u);
+  p6 = bitalign(p6, p5, (__CLC_UINTN)30u);
+  p5 = bitalign(p5, p4, (__CLC_UINTN)30u);
 
   // Subtract 1 if msb of fraction is 1, i.e. fraction >= 0.5
   __CLC_UINTN flip = (i & 1) != 0 ? 0xFFFFFFFFU : 0U;
@@ -297,12 +297,12 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS(
       __CLC_AS_FLOATN(sign | ((127U - __CLC_AS_UINTN(xe)) << 23U) | p7 >> 9);
 
   // Shift out bits we captured on q1
-  p7 = bitalign(p7, p6, 32 - 23);
+  p7 = bitalign(p7, p6, (__CLC_UINTN)(32u - 23u));
 
   // Get 24 more bits of fraction in another float, there are not long strings
   // of zeroes here
   __CLC_INTN xxe = __CLC_AS_INTN(__clc_clz(p7)) + 1;
-  p7 = bitalign(p7, p6, 32 - xxe);
+  p7 = bitalign(p7, p6, __CLC_CONVERT_UINTN(32 - xxe));
   __CLC_FLOATN q0 = __CLC_AS_FLOATN(
       sign | ((127U - __CLC_AS_UINTN(xe + 23 + xxe)) << 23U) | p7 >> 9);