[llvm-branch-commits] [libclc] libclc: Use fshr builtin in sincos helpers (PR #186427)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Mar 13 08:53:19 PDT 2026
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/186427
None
>From 2bc56d677af3466a905d52d8fb552884b46db250 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 13 Mar 2026 09:54:20 +0100
Subject: [PATCH] libclc: Use fshr builtin in sincos helpers
---
libclc/clc/lib/generic/math/clc_sincos_helpers.cl | 2 +-
libclc/clc/lib/generic/math/clc_sincos_helpers.inc | 10 +++++-----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
index 19705c42f6f07..aeba3c14dd9b9 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
@@ -16,7 +16,7 @@
#include <clc/math/clc_trunc.h>
#include <clc/math/math.h>
-#define bitalign(hi, lo, shift) ((hi) << (32 - (shift))) | ((lo) >> (shift));
+#define bitalign(hi, lo, shift) __builtin_elementwise_fshr(hi, lo, shift)
#define __CLC_FULL_MUL(A, B, HI, LO) \
LO = A * B; \
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index 2a71b5626ccc5..d945438b6ae1d 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -275,9 +275,9 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS(
__CLC_INTN i = __CLC_AS_INTN(p7 >> 29U);
// Scoot up 2 more bits so only fraction remains
- p7 = bitalign(p7, p6, 30);
- p6 = bitalign(p6, p5, 30);
- p5 = bitalign(p5, p4, 30);
+ p7 = bitalign(p7, p6, (__CLC_UINTN)30u);
+ p6 = bitalign(p6, p5, (__CLC_UINTN)30u);
+ p5 = bitalign(p5, p4, (__CLC_UINTN)30u);
// Subtract 1 if msb of fraction is 1, i.e. fraction >= 0.5
__CLC_UINTN flip = (i & 1) != 0 ? 0xFFFFFFFFU : 0U;
@@ -297,12 +297,12 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS(
__CLC_AS_FLOATN(sign | ((127U - __CLC_AS_UINTN(xe)) << 23U) | p7 >> 9);
// Shift out bits we captured on q1
- p7 = bitalign(p7, p6, 32 - 23);
+ p7 = bitalign(p7, p6, (__CLC_UINTN)(32u - 23u));
// Get 24 more bits of fraction in another float, there are not long strings
// of zeroes here
__CLC_INTN xxe = __CLC_AS_INTN(__clc_clz(p7)) + 1;
- p7 = bitalign(p7, p6, 32 - xxe);
+ p7 = bitalign(p7, p6, __CLC_CONVERT_UINTN(32 - xxe));
__CLC_FLOATN q0 = __CLC_AS_FLOATN(
sign | ((127U - __CLC_AS_UINTN(xe + 23 + xxe)) << 23U) | p7 >> 9);
More information about the llvm-branch-commits
mailing list