[clang] [AMDGPU][clang] provide device implementation for __builtin_logb and … (PR #129347)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 25 22:36:54 PDT 2025
https://github.com/choikwa updated https://github.com/llvm/llvm-project/pull/129347
>From 54076ba363db12cd04aefe58b385834a5fb986c2 Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Fri, 28 Feb 2025 16:52:03 -0600
Subject: [PATCH] [AMDGPU][clang] provide device implementation for
__builtin_logb and __builtin_scalbn
Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them.
This patch generates a device implementations for __builtin_logb and __builtin_scalbn.
---
clang/lib/CodeGen/CGBuiltin.cpp | 19 ++++++++-
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 45 +++++++++++++++++++++
clang/test/CodeGen/logb_scalbn.c | 29 +++++++++++++
3 files changed, 92 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CodeGen/logb_scalbn.c
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f0ba52fa41ce8..b5c5b5e55efed 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6011,10 +6011,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
}
+ // These will be emitted as Intrinsic later.
+ auto NeedsDeviceOverload = [&](unsigned BuiltinID) {
+ if (getTarget().getTriple().isAMDGCN()) {
+ switch (BuiltinID) {
+ default:
+ return false;
+ case Builtin::BIlogb:
+ case Builtin::BI__builtin_logb:
+ case Builtin::BIscalbn:
+ case Builtin::BI__builtin_scalbn:
+ return true;
+ }
+ }
+ return false;
+ };
+
// If this is an alias for a lib function (e.g. __builtin_sin), emit
// the call using the normal call path, but using the unmangled
// version of the function name.
- if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
+ if (!NeedsDeviceOverload(BuiltinID) &&
+ getContext().BuiltinInfo.isLibFunction(BuiltinID))
return emitLibraryCall(*this, FD, E,
CGM.getBuiltinLibFunction(FD, BuiltinID));
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index f94917c905081..b0e7679b69043 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -31,6 +31,27 @@ using namespace CodeGen;
using namespace llvm;
namespace {
+
+// Has second type mangled argument.
+static Value *
+emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
+ Intrinsic::ID IntrinsicID,
+ Intrinsic::ID ConstrainedIntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
+ if (CGF.Builder.getIsFPConstrained()) {
+ Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
+ {Src0->getType(), Src1->getType()});
+ return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
+ }
+
+ Function *F =
+ CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
+ return CGF.Builder.CreateCall(F, {Src0, Src1});
+}
+
// If \p E is not null pointer, insert address space cast to match return
// type of \p E if necessary.
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
@@ -1876,6 +1897,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
return emitBuiltinWithOneOverloadedType<2>(
*this, E, Intrinsic::amdgcn_s_prefetch_data);
+ case Builtin::BIlogb:
+ case Builtin::BI__builtin_logb: {
+ auto *Src0 = EmitScalarExpr(E->getArg(0));
+ auto *FrExpFunc = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
+ {Builder.getInt32Ty(), Src0->getType()});
+ auto *FrExp = Builder.CreateCall(FrExpFunc, Src0);
+ auto *Add = Builder.CreateAdd(
+ FrExp, ConstantInt::getSigned(FrExp->getType(), -1), "", false, true);
+ auto *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
+ auto *Fabs = emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
+ auto *FCmpONE = Builder.CreateFCmpONE(
+ Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
+ auto *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
+ auto *FCmpOEQ =
+ Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
+ auto *Sel2 = Builder.CreateSelect(
+ FCmpOEQ, ConstantFP::getInfinity(Builder.getDoubleTy(), /*Neg*/ true),
+ Sel1);
+ return Sel2;
+ }
+ case Builtin::BIscalbn:
+ case Builtin::BI__builtin_scalbn:
+ return emitBinaryExpMaybeConstrainedFPBuiltin(
+ *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
default:
return nullptr;
}
diff --git a/clang/test/CodeGen/logb_scalbn.c b/clang/test/CodeGen/logb_scalbn.c
new file mode 100644
index 0000000000000..ed4c7bc56cd6b
--- /dev/null
+++ b/clang/test/CodeGen/logb_scalbn.c
@@ -0,0 +1,29 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @my_kernel(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
+// CHECK-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double 1.600000e+01)
+// CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1
+// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to double
+// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.fabs.f64(double 1.600000e+01)
+// CHECK-NEXT: [[TMP4:%.*]] = fcmp one double [[TMP3]], 0x7FF0000000000000
+// CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], double [[TMP2]], double [[TMP3]]
+// CHECK-NEXT: [[TMP6:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP5]]
+// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP6]] to float
+// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.ldexp.f64.i32(double 1.600000e+01, i32 10)
+// CHECK-NEXT: [[CONV1:%.*]] = fptrunc double [[TMP7]] to float
+// CHECK-NEXT: store float [[CONV1]], ptr [[D2_ASCAST]], align 4
+// CHECK-NEXT: ret void
+//
+void my_kernel(){
+ float D1 = __builtin_logb((float)16);
+ float D2 = __builtin_scalbn((float)16, 10);
+}
+
More information about the cfe-commits
mailing list