[clang] ed181ef - [HIP][AMDGPU] expand printf when compiling HIP to AMDGPU
Sameer Sahasrabuddhe via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 16 01:55:35 PST 2020
Author: Sameer Sahasrabuddhe
Date: 2020-01-16T15:15:38+05:30
New Revision: ed181efa175d3e0acc134e6cd161914e64c7195e
URL: https://github.com/llvm/llvm-project/commit/ed181efa175d3e0acc134e6cd161914e64c7195e
DIFF: https://github.com/llvm/llvm-project/commit/ed181efa175d3e0acc134e6cd161914e64c7195e.diff
LOG: [HIP][AMDGPU] expand printf when compiling HIP to AMDGPU
Summary:
This change implements the expansion in two parts:
- Add a utility function emitAMDGPUPrintfCall() in LLVM.
- Invoke the above function from Clang CodeGen, when processing a HIP
program for the AMDGPU target.
The printf expansion has undefined behaviour if the format string is
not a compile-time constant. As a sufficient condition, the HIP
ToolChain now emits -Werror=format-nonliteral.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D71365
Added:
clang/test/CodeGenHIP/printf-aggregate.cpp
clang/test/CodeGenHIP/printf.cpp
clang/test/Driver/hip-printf.hip
llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CGGPUBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Driver/ToolChains/HIP.cpp
llvm/lib/Transforms/Utils/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09fd3087b494..4decaa593a59 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4115,6 +4115,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIprintf:
if (getTarget().getTriple().isNVPTX())
return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
+ if (getTarget().getTriple().getArch() == Triple::amdgcn &&
+ getLangOpts().HIP)
+ return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
break;
case Builtin::BI__builtin_canonicalize:
case Builtin::BI__builtin_canonicalizef:
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index d7e267630762..bccce7dd7ff4 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
using namespace clang;
using namespace CodeGen;
@@ -120,3 +121,36 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
return RValue::get(Builder.CreateCall(
VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
}
+
+RValue
+CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue) {
+ assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
+ assert(E->getBuiltinCallee() == Builtin::BIprintf ||
+ E->getBuiltinCallee() == Builtin::BI__builtin_printf);
+ assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+ CallArgList CallArgs;
+ EmitCallArgs(CallArgs,
+ E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+ E->arguments(), E->getDirectCallee(),
+ /* ParamsToSkip = */ 0);
+
+ SmallVector<llvm::Value *, 8> Args;
+ for (auto A : CallArgs) {
+ // We don't know how to emit non-scalar varargs.
+ if (!A.getRValue(*this).isScalar()) {
+ CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+ return RValue::get(llvm::ConstantInt::get(IntTy, -1));
+ }
+
+ llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+ Args.push_back(Arg);
+ }
+
+ llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
+ IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+ auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);
+ Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
+ return RValue::get(Printf);
+}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 3d8bc93eb965..5ab15bf74a23 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3722,6 +3722,8 @@ class CodeGenFunction : public CodeGenTypeCache {
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue);
+ RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue);
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E, ReturnValueSlot ReturnValue);
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index f89e648948ab..d4b015a7e873 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -436,6 +436,7 @@ Tool *HIPToolChain::buildLinker() const {
void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
HostTC.addClangWarningOptions(CC1Args);
+ CC1Args.push_back("-Werror=format-nonliteral");
}
ToolChain::CXXStdlibType
diff --git a/clang/test/CodeGenHIP/printf-aggregate.cpp b/clang/test/CodeGenHIP/printf-aggregate.cpp
new file mode 100644
index 000000000000..83e8f899c412
--- /dev/null
+++ b/clang/test/CodeGenHIP/printf-aggregate.cpp
@@ -0,0 +1,20 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \
+// RUN: -verify -emit-llvm %s
+
+#define __device__ __attribute__((device))
+extern "C" __device__ int printf(const char *format, ...);
+
+// Check that we don't crash when asked to printf a non-scalar arg.
+struct Struct {
+ int x;
+ int y;
+};
+
+__device__ void PrintfNonScalar(const char *fmt) {
+ printf(fmt, 1);
+ // Ignore the warning about the %d not matching the struct argument
+ // expected-warning at +2 {{}}
+ // expected-error at +1 {{cannot compile this non-scalar arg to printf}}
+ printf("%d", Struct());
+}
diff --git a/clang/test/CodeGenHIP/printf.cpp b/clang/test/CodeGenHIP/printf.cpp
new file mode 100644
index 000000000000..450a77c63bba
--- /dev/null
+++ b/clang/test/CodeGenHIP/printf.cpp
@@ -0,0 +1,44 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \
+// RUN: -o - %s | FileCheck --enable-var-scope %s
+
+#define __device__ __attribute__((device))
+
+extern "C" __device__ int printf(const char *format, ...);
+
+__device__ int foo1() {
+ const char *s = "hello world";
+ return printf("%.*f %*.*s %p\n", 8, 3.14159, 8, 4, s, s);
+}
+
+// CHECK-LABEL: @_Z4foo1v()
+// CHECK: [[BEGIN:%.*]] = call i64 @__ockl_printf_begin(i64 0)
+// CHECK: [[STRLEN1:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND1:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[BEGIN]], {{.*}}, i64 [[STRLEN1]], i32 0)
+// CHECK: [[APPEND2:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND1]], i32 1, i64 8, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[APPEND3:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND2]], i32 1, i64 4614256650576692846, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[APPEND4:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND3]], i32 1, i64 8, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[APPEND5:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND4]], i32 1, i64 4, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[STRLEN2:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND6:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[APPEND5]], {{.*}}, i64 [[STRLEN2]], i32 0)
+// CHECK: [[PTR2INT:%.*]] = ptrtoint i8* %{{.*}} to i64
+// CHECK: [[APPEND7:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND6]], i32 1, i64 [[PTR2INT]], i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 1)
+// CHECK: [[RETURN:%.*]] = trunc i64 [[APPEND7]] to i32
+// CHECK: ret i32 [[RETURN]]
+
+__device__ char *dstr;
+
+__device__ int foo2() {
+ return printf("%s %p\n", dstr, dstr);
+}
+
+// CHECK-LABEL: @_Z4foo2v()
+// CHECK: [[BEGIN:%.*]] = call i64 @__ockl_printf_begin(i64 0)
+// CHECK: [[STRLEN1:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND1:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[BEGIN]], {{.*}}, i64 [[STRLEN1]], i32 0)
+// CHECK: [[STRLEN2:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND2:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[APPEND1]], {{.*}}, i64 [[STRLEN2]], i32 0)
+// CHECK: [[PTR2INT:%.*]] = ptrtoint i8* %{{.*}} to i64
+// CHECK: [[APPEND3:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND2]], i32 1, i64 [[PTR2INT]], i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 1)
+// CHECK: [[RETURN:%.*]] = trunc i64 [[APPEND3]] to i32
+// CHECK: ret i32 [[RETURN]]
diff --git a/clang/test/Driver/hip-printf.hip b/clang/test/Driver/hip-printf.hip
new file mode 100644
index 000000000000..2df344f8fb2e
--- /dev/null
+++ b/clang/test/Driver/hip-printf.hip
@@ -0,0 +1,9 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -x hip --cuda-gpu-arch=gfx900 \
+// RUN: %s 2>&1 | FileCheck %s
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1"
+// CHECK-SAME: "-Werror=format-nonliteral"
diff --git a/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h b/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
new file mode 100644
index 000000000000..65dbf47e9bbc
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
@@ -0,0 +1,25 @@
+//===- AMDGPUEmitPrintf.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H
+#define LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H
+
+#include "llvm/IR/IRBuilder.h"
+
+namespace llvm {
+
+Value *emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
new file mode 100644
index 000000000000..976956ca4a09
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,246 @@
+//===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include <iostream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static bool isCString(const Value *Arg) {
+ auto Ty = Arg->getType();
+ auto PtrTy = dyn_cast<PointerType>(Ty);
+ if (!PtrTy)
+ return false;
+
+ auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType());
+ if (!IntTy)
+ return false;
+
+ return IntTy->getBitWidth() == 8;
+}
+
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto Ty = Arg->getType();
+
+ if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
+ switch (IntTy->getBitWidth()) {
+ case 32:
+ return Builder.CreateZExt(Arg, Int64Ty);
+ case 64:
+ return Arg;
+ }
+ }
+
+ if (Ty->getTypeID() == Type::DoubleTyID) {
+ return Builder.CreateBitCast(Arg, Int64Ty);
+ }
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+ return Builder.CreatePtrToInt(Arg, Int64Ty);
+ }
+
+ llvm_unreachable("unexpected type");
+}
+
+static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+ return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
+ Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+ Value *Arg4, Value *Arg5, Value *Arg6,
+ bool IsLast) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto Int32Ty = Builder.getInt32Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+ Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+ Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+ auto IsLastValue = Builder.getInt32(IsLast);
+ auto NumArgsValue = Builder.getInt32(NumArgs);
+ return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+ Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool IsLast) {
+ auto Arg0 = fitArgInto64Bits(Builder, Arg);
+ auto Zero = Builder.getInt64(0);
+ return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+ Zero, IsLast);
+}
+
+// The device library does not provide strlen, so we build our own loop
+// here. While we are at it, we also include the terminating null in the length.
+static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
+ auto *Prev = Builder.GetInsertBlock();
+ Module *M = Prev->getModule();
+
+ auto CharZero = Builder.getInt8(0);
+ auto One = Builder.getInt64(1);
+ auto Zero = Builder.getInt64(0);
+ auto Int64Ty = Builder.getInt64Ty();
+
+ // The length is either zero for a null pointer, or the computed value for an
+ // actual string. We need a join block for a phi that represents the final
+ // value.
+ //
+ // Strictly speaking, the zero does not matter since
+ // __ockl_printf_append_string_n ignores the length if the pointer is null.
+ BasicBlock *Join = nullptr;
+ if (Prev->getTerminator()) {
+ Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
+ "strlen.join");
+ Prev->getTerminator()->eraseFromParent();
+ } else {
+ Join = BasicBlock::Create(M->getContext(), "strlen.join",
+ Prev->getParent());
+ }
+ BasicBlock *While =
+ BasicBlock::Create(M->getContext(), "strlen.while",
+ Prev->getParent(), Join);
+ BasicBlock *WhileDone = BasicBlock::Create(
+ M->getContext(), "strlen.while.done",
+ Prev->getParent(), Join);
+
+ // Emit an early return for when the pointer is null.
+ Builder.SetInsertPoint(Prev);
+ auto CmpNull =
+ Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
+ BranchInst::Create(Join, While, CmpNull, Prev);
+
+ // Entry to the while loop.
+ Builder.SetInsertPoint(While);
+
+ auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
+ PtrPhi->addIncoming(Str, Prev);
+ auto PtrNext = Builder.CreateGEP(PtrPhi, One);
+ PtrPhi->addIncoming(PtrNext, While);
+
+ // Condition for the while loop.
+ auto Data = Builder.CreateLoad(PtrPhi);
+ auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
+ Builder.CreateCondBr(Cmp, WhileDone, While);
+
+ // Add one to the computed length.
+ Builder.SetInsertPoint(WhileDone, WhileDone->begin());
+ auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
+ auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
+ auto Len = Builder.CreateSub(End, Begin);
+ Len = Builder.CreateAdd(Len, One);
+
+ // Final join.
+ BranchInst::Create(Join, WhileDone);
+ Builder.SetInsertPoint(Join, Join->begin());
+ auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
+ LenPhi->addIncoming(Len, WhileDone);
+ LenPhi->addIncoming(Zero, Prev);
+
+ return LenPhi;
+}
+
+static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
+ Value *Length, bool isLast) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto CharPtrTy = Builder.getInt8PtrTy();
+ auto Int32Ty = Builder.getInt32Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
+ Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
+ auto IsLastInt32 = Builder.getInt32(isLast);
+ return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
+}
+
+static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool IsLast) {
+ auto Length = getStrlenWithNull(Builder, Arg);
+ return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
+}
+
+static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool SpecIsCString, bool IsLast) {
+ if (SpecIsCString && isCString(Arg)) {
+ return appendString(Builder, Desc, Arg, IsLast);
+ }
+ // If the format specifies a string but the argument is not, the frontend will
+ // have printed a warning. We just rely on undefined behaviour and send the
+ // argument anyway.
+ return appendArg(Builder, Desc, Arg, IsLast);
+}
+
+// Scan the format string to locate all specifiers, and mark the ones that
+// specify a string, i.e, the "%s" specifier with optional '*' characters.
+static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
+ StringRef Str;
+ if (!getConstantStringInfo(Fmt, Str) || Str.empty())
+ return;
+
+ static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
+ size_t SpecPos = 0;
+ // Skip the first argument, the format string.
+ unsigned ArgIdx = 1;
+
+ while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
+ if (Str[SpecPos + 1] == '%') {
+ SpecPos += 2;
+ continue;
+ }
+ auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
+ if (SpecEnd == StringRef::npos)
+ return;
+ auto Spec = Str.slice(SpecPos, SpecEnd + 1);
+ ArgIdx += Spec.count('*');
+ if (Str[SpecEnd] == 's') {
+ BV.set(ArgIdx);
+ }
+ SpecPos = SpecEnd + 1;
+ ++ArgIdx;
+ }
+}
+
+Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
+ ArrayRef<Value *> Args) {
+ auto NumOps = Args.size();
+ assert(NumOps >= 1);
+
+ auto Fmt = Args[0];
+ SparseBitVector<8> SpecIsCString;
+ locateCStrings(SpecIsCString, Fmt);
+
+ auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
+ Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
+
+ // FIXME: This invokes hostcall once for each argument. We can pack up to
+ // seven scalar printf arguments in a single hostcall. See the signature of
+ // callAppendArgs().
+ for (unsigned int i = 1; i != NumOps; ++i) {
+ bool IsLast = i == NumOps - 1;
+ bool IsCString = SpecIsCString.test(i);
+ Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
+ }
+
+ return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
+}
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 67bc6fbd1809..7c0864ad1ccd 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_component_library(LLVMTransformUtils
+ AMDGPUEmitPrintf.cpp
ASanStackFrameLayout.cpp
AddDiscriminators.cpp
BasicBlockUtils.cpp
More information about the cfe-commits
mailing list