[libclc] [libclc][amdgpu] Implement native_exp via builtin (PR #133696)
Fraser Cormack via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 31 03:51:36 PDT 2025
https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/133696
>From b927766ee9d6e8a19af3cfcd5a05f7dcd2197ced Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 31 Mar 2025 11:37:43 +0100
Subject: [PATCH] [libclc][amdgpu] Implement native_exp via AMD builtin
This came up during a discussion on #129679, which has been split out as
a preparatory commit.
An example of the AMDGPU codegen is:
define <2 x float> @_Z10native_expDv2_f(<2 x float> %val) {
entry:
%mul = fmul afn <2 x float> %val, splat (float 0x3FF7154760000000)
%0 = extractelement <2 x float> %mul, i64 0
%1 = tail call float @llvm.amdgcn.exp2.f32(float %0)
%vecinit.i = insertelement <2 x float> poison, float %1, i64 0
%2 = extractelement <2 x float> %mul, i64 1
%3 = tail call float @llvm.amdgcn.exp2.f32(float %2)
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %3, i64 1
ret <2 x float> %vecinit2.i
}
define <2 x float> @_Z11native_exp2Dv2_f(<2 x float> %x) {
entry:
%0 = extractelement <2 x float> %x, i64 0
%1 = tail call float @llvm.amdgcn.exp2.f32(float %0)
%vecinit = insertelement <2 x float> poison, float %1, i64 0
%2 = extractelement <2 x float> %x, i64 1
%3 = tail call float @llvm.amdgcn.exp2.f32(float %2)
%vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1
ret <2 x float> %vecinit2
}
---
libclc/amdgpu/lib/SOURCES | 1 +
libclc/amdgpu/lib/math/native_exp2.cl | 16 ++++++++++++++++
2 files changed, 17 insertions(+)
create mode 100644 libclc/amdgpu/lib/math/native_exp2.cl
diff --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES
index d7782a2ae14dc..ed5e45a37c18d 100644
--- a/libclc/amdgpu/lib/SOURCES
+++ b/libclc/amdgpu/lib/SOURCES
@@ -1,4 +1,5 @@
math/native_exp.cl
+math/native_exp2.cl
math/native_log.cl
math/native_log10.cl
math/half_exp.cl
diff --git a/libclc/amdgpu/lib/math/native_exp2.cl b/libclc/amdgpu/lib/math/native_exp2.cl
new file mode 100644
index 0000000000000..39ae914b19634
--- /dev/null
+++ b/libclc/amdgpu/lib/math/native_exp2.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
+
+_CLC_OVERLOAD _CLC_DEF float native_exp2(float val) {
+ return __builtin_amdgcn_exp2f(val);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, native_exp2, float)
More information about the cfe-commits
mailing list