[llvm] ed181ef - [HIP][AMDGPU] expand printf when compiling HIP to AMDGPU

Sameer Sahasrabuddhe via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 16 01:55:36 PST 2020


Author: Sameer Sahasrabuddhe
Date: 2020-01-16T15:15:38+05:30
New Revision: ed181efa175d3e0acc134e6cd161914e64c7195e

URL: https://github.com/llvm/llvm-project/commit/ed181efa175d3e0acc134e6cd161914e64c7195e
DIFF: https://github.com/llvm/llvm-project/commit/ed181efa175d3e0acc134e6cd161914e64c7195e.diff

LOG: [HIP][AMDGPU] expand printf when compiling HIP to AMDGPU

Summary:
This change implements the expansion in two parts:
- Add a utility function emitAMDGPUPrintfCall() in LLVM.
- Invoke the above function from Clang CodeGen, when processing a HIP
  program for the AMDGPU target.

The printf expansion has undefined behaviour if the format string is
not a compile-time constant. As a sufficient condition, the HIP
ToolChain now emits -Werror=format-nonliteral.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D71365

Added: 
    clang/test/CodeGenHIP/printf-aggregate.cpp
    clang/test/CodeGenHIP/printf.cpp
    clang/test/Driver/hip-printf.hip
    llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
    llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CGGPUBuiltin.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/lib/Driver/ToolChains/HIP.cpp
    llvm/lib/Transforms/Utils/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09fd3087b494..4decaa593a59 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4115,6 +4115,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BIprintf:
     if (getTarget().getTriple().isNVPTX())
       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
+    if (getTarget().getTriple().getArch() == Triple::amdgcn &&
+        getLangOpts().HIP)
+      return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
     break;
   case Builtin::BI__builtin_canonicalize:
   case Builtin::BI__builtin_canonicalizef:

diff  --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index d7e267630762..bccce7dd7ff4 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -120,3 +121,36 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
   return RValue::get(Builder.CreateCall(
       VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
 }
+
+RValue
+CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+                                                ReturnValueSlot ReturnValue) {
+  assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
+  assert(E->getBuiltinCallee() == Builtin::BIprintf ||
+         E->getBuiltinCallee() == Builtin::BI__builtin_printf);
+  assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+  CallArgList CallArgs;
+  EmitCallArgs(CallArgs,
+               E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+               E->arguments(), E->getDirectCallee(),
+               /* ParamsToSkip = */ 0);
+
+  SmallVector<llvm::Value *, 8> Args;
+  for (auto A : CallArgs) {
+    // We don't know how to emit non-scalar varargs.
+    if (!A.getRValue(*this).isScalar()) {
+      CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+      return RValue::get(llvm::ConstantInt::get(IntTy, -1));
+    }
+
+    llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+    Args.push_back(Arg);
+  }
+
+  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
+  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);
+  Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
+  return RValue::get(Printf);
+}

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 3d8bc93eb965..5ab15bf74a23 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3722,6 +3722,8 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
                                        ReturnValueSlot ReturnValue);
+  RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+                                        ReturnValueSlot ReturnValue);
 
   RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                          const CallExpr *E, ReturnValueSlot ReturnValue);

diff  --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index f89e648948ab..d4b015a7e873 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -436,6 +436,7 @@ Tool *HIPToolChain::buildLinker() const {
 
 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
   HostTC.addClangWarningOptions(CC1Args);
+  CC1Args.push_back("-Werror=format-nonliteral");
 }
 
 ToolChain::CXXStdlibType

diff  --git a/clang/test/CodeGenHIP/printf-aggregate.cpp b/clang/test/CodeGenHIP/printf-aggregate.cpp
new file mode 100644
index 000000000000..83e8f899c412
--- /dev/null
+++ b/clang/test/CodeGenHIP/printf-aggregate.cpp
@@ -0,0 +1,20 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \
+// RUN:    -verify -emit-llvm %s
+
+#define __device__ __attribute__((device))
+extern "C" __device__ int printf(const char *format, ...);
+
+// Check that we don't crash when asked to printf a non-scalar arg.
+struct Struct {
+  int x;
+  int y;
+};
+
+__device__ void PrintfNonScalar(const char *fmt) {
+  printf(fmt, 1);
+  // Ignore the warning about the %d not matching the struct argument
+  // expected-warning at +2 {{}}
+  // expected-error at +1 {{cannot compile this non-scalar arg to printf}}
+  printf("%d", Struct());
+}

diff  --git a/clang/test/CodeGenHIP/printf.cpp b/clang/test/CodeGenHIP/printf.cpp
new file mode 100644
index 000000000000..450a77c63bba
--- /dev/null
+++ b/clang/test/CodeGenHIP/printf.cpp
@@ -0,0 +1,44 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --enable-var-scope %s
+
+#define __device__ __attribute__((device))
+
+extern "C" __device__ int printf(const char *format, ...);
+
+__device__ int foo1() {
+  const char *s = "hello world";
+  return printf("%.*f %*.*s %p\n", 8, 3.14159, 8, 4, s, s);
+}
+
+// CHECK-LABEL: @_Z4foo1v()
+// CHECK: [[BEGIN:%.*]]   = call i64 @__ockl_printf_begin(i64 0)
+// CHECK: [[STRLEN1:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND1:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[BEGIN]], {{.*}}, i64 [[STRLEN1]], i32 0)
+// CHECK: [[APPEND2:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND1]], i32 1, i64 8, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[APPEND3:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND2]], i32 1, i64 4614256650576692846, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[APPEND4:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND3]], i32 1, i64 8, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[APPEND5:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND4]], i32 1, i64 4, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
+// CHECK: [[STRLEN2:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND6:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[APPEND5]], {{.*}}, i64 [[STRLEN2]], i32 0)
+// CHECK: [[PTR2INT:%.*]] = ptrtoint i8* %{{.*}} to i64
+// CHECK: [[APPEND7:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND6]], i32 1, i64 [[PTR2INT]], i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 1)
+// CHECK: [[RETURN:%.*]]  = trunc i64 [[APPEND7]] to i32
+// CHECK: ret i32 [[RETURN]]
+
+__device__ char *dstr;
+
+__device__ int foo2() {
+  return printf("%s %p\n", dstr, dstr);
+}
+
+// CHECK-LABEL: @_Z4foo2v()
+// CHECK: [[BEGIN:%.*]]   = call i64 @__ockl_printf_begin(i64 0)
+// CHECK: [[STRLEN1:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND1:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[BEGIN]], {{.*}}, i64 [[STRLEN1]], i32 0)
+// CHECK: [[STRLEN2:%.*]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ]
+// CHECK: [[APPEND2:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[APPEND1]], {{.*}}, i64 [[STRLEN2]], i32 0)
+// CHECK: [[PTR2INT:%.*]] = ptrtoint i8* %{{.*}} to i64
+// CHECK: [[APPEND3:%.*]] = call i64 @__ockl_printf_append_args(i64 [[APPEND2]], i32 1, i64 [[PTR2INT]], i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 1)
+// CHECK: [[RETURN:%.*]]  = trunc i64 [[APPEND3]] to i32
+// CHECK: ret i32 [[RETURN]]

diff  --git a/clang/test/Driver/hip-printf.hip b/clang/test/Driver/hip-printf.hip
new file mode 100644
index 000000000000..2df344f8fb2e
--- /dev/null
+++ b/clang/test/Driver/hip-printf.hip
@@ -0,0 +1,9 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   %s 2>&1 | FileCheck %s
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1"
+// CHECK-SAME: "-Werror=format-nonliteral"

diff  --git a/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h b/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
new file mode 100644
index 000000000000..65dbf47e9bbc
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
@@ -0,0 +1,25 @@
+//===- AMDGPUEmitPrintf.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H
+#define LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H
+
+#include "llvm/IR/IRBuilder.h"
+
+namespace llvm {
+
+Value *emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H

diff  --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
new file mode 100644
index 000000000000..976956ca4a09
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,246 @@
+//===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include <iostream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static bool isCString(const Value *Arg) {
+  auto Ty = Arg->getType();
+  auto PtrTy = dyn_cast<PointerType>(Ty);
+  if (!PtrTy)
+    return false;
+
+  auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType());
+  if (!IntTy)
+    return false;
+
+  return IntTy->getBitWidth() == 8;
+}
+
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Ty = Arg->getType();
+
+  if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
+    switch (IntTy->getBitWidth()) {
+    case 32:
+      return Builder.CreateZExt(Arg, Int64Ty);
+    case 64:
+      return Arg;
+    }
+  }
+
+  if (Ty->getTypeID() == Type::DoubleTyID) {
+    return Builder.CreateBitCast(Arg, Int64Ty);
+  }
+
+  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+    return Builder.CreatePtrToInt(Arg, Int64Ty);
+  }
+
+  llvm_unreachable("unexpected type");
+}
+
+static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+  return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
+                             Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+                             Value *Arg4, Value *Arg5, Value *Arg6,
+                             bool IsLast) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Int32Ty = Builder.getInt32Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+                                   Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+                                   Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+  auto IsLastValue = Builder.getInt32(IsLast);
+  auto NumArgsValue = Builder.getInt32(NumArgs);
+  return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+                                 Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+                        bool IsLast) {
+  auto Arg0 = fitArgInto64Bits(Builder, Arg);
+  auto Zero = Builder.getInt64(0);
+  return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+                        Zero, IsLast);
+}
+
+// The device library does not provide strlen, so we build our own loop
+// here. While we are at it, we also include the terminating null in the length.
+static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
+  auto *Prev = Builder.GetInsertBlock();
+  Module *M = Prev->getModule();
+
+  auto CharZero = Builder.getInt8(0);
+  auto One = Builder.getInt64(1);
+  auto Zero = Builder.getInt64(0);
+  auto Int64Ty = Builder.getInt64Ty();
+
+  // The length is either zero for a null pointer, or the computed value for an
+  // actual string. We need a join block for a phi that represents the final
+  // value.
+  //
+  //  Strictly speaking, the zero does not matter since
+  // __ockl_printf_append_string_n ignores the length if the pointer is null.
+  BasicBlock *Join = nullptr;
+  if (Prev->getTerminator()) {
+    Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
+                                 "strlen.join");
+    Prev->getTerminator()->eraseFromParent();
+  } else {
+    Join = BasicBlock::Create(M->getContext(), "strlen.join",
+                              Prev->getParent());
+  }
+  BasicBlock *While =
+      BasicBlock::Create(M->getContext(), "strlen.while",
+                         Prev->getParent(), Join);
+  BasicBlock *WhileDone = BasicBlock::Create(
+      M->getContext(), "strlen.while.done",
+      Prev->getParent(), Join);
+
+  // Emit an early return for when the pointer is null.
+  Builder.SetInsertPoint(Prev);
+  auto CmpNull =
+      Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
+  BranchInst::Create(Join, While, CmpNull, Prev);
+
+  // Entry to the while loop.
+  Builder.SetInsertPoint(While);
+
+  auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
+  PtrPhi->addIncoming(Str, Prev);
+  auto PtrNext = Builder.CreateGEP(PtrPhi, One);
+  PtrPhi->addIncoming(PtrNext, While);
+
+  // Condition for the while loop.
+  auto Data = Builder.CreateLoad(PtrPhi);
+  auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
+  Builder.CreateCondBr(Cmp, WhileDone, While);
+
+  // Add one to the computed length.
+  Builder.SetInsertPoint(WhileDone, WhileDone->begin());
+  auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
+  auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
+  auto Len = Builder.CreateSub(End, Begin);
+  Len = Builder.CreateAdd(Len, One);
+
+  // Final join.
+  BranchInst::Create(Join, WhileDone);
+  Builder.SetInsertPoint(Join, Join->begin());
+  auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
+  LenPhi->addIncoming(Len, WhileDone);
+  LenPhi->addIncoming(Zero, Prev);
+
+  return LenPhi;
+}
+
+static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
+                                Value *Length, bool isLast) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto CharPtrTy = Builder.getInt8PtrTy();
+  auto Int32Ty = Builder.getInt32Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
+                                   Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
+  auto IsLastInt32 = Builder.getInt32(isLast);
+  return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
+}
+
+static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+                           bool IsLast) {
+  auto Length = getStrlenWithNull(Builder, Arg);
+  return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
+}
+
+static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+                         bool SpecIsCString, bool IsLast) {
+  if (SpecIsCString && isCString(Arg)) {
+    return appendString(Builder, Desc, Arg, IsLast);
+  }
+  // If the format specifies a string but the argument is not, the frontend will
+  // have printed a warning. We just rely on undefined behaviour and send the
+  // argument anyway.
+  return appendArg(Builder, Desc, Arg, IsLast);
+}
+
+// Scan the format string to locate all specifiers, and mark the ones that
+// specify a string, i.e, the "%s" specifier with optional '*' characters.
+static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
+  StringRef Str;
+  if (!getConstantStringInfo(Fmt, Str) || Str.empty())
+    return;
+
+  static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
+  size_t SpecPos = 0;
+  // Skip the first argument, the format string.
+  unsigned ArgIdx = 1;
+
+  while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
+    if (Str[SpecPos + 1] == '%') {
+      SpecPos += 2;
+      continue;
+    }
+    auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
+    if (SpecEnd == StringRef::npos)
+      return;
+    auto Spec = Str.slice(SpecPos, SpecEnd + 1);
+    ArgIdx += Spec.count('*');
+    if (Str[SpecEnd] == 's') {
+      BV.set(ArgIdx);
+    }
+    SpecPos = SpecEnd + 1;
+    ++ArgIdx;
+  }
+}
+
+Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
+                                  ArrayRef<Value *> Args) {
+  auto NumOps = Args.size();
+  assert(NumOps >= 1);
+
+  auto Fmt = Args[0];
+  SparseBitVector<8> SpecIsCString;
+  locateCStrings(SpecIsCString, Fmt);
+
+  auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
+  Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
+
+  // FIXME: This invokes hostcall once for each argument. We can pack up to
+  // seven scalar printf arguments in a single hostcall. See the signature of
+  // callAppendArgs().
+  for (unsigned int i = 1; i != NumOps; ++i) {
+    bool IsLast = i == NumOps - 1;
+    bool IsCString = SpecIsCString.test(i);
+    Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
+  }
+
+  return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
+}

diff  --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 67bc6fbd1809..7c0864ad1ccd 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_component_library(LLVMTransformUtils
+  AMDGPUEmitPrintf.cpp
   ASanStackFrameLayout.cpp
   AddDiscriminators.cpp
   BasicBlockUtils.cpp


        


More information about the llvm-commits mailing list