[compiler-rt] [clang] [HIP] support 128 bit int division (PR #71978)

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 7 14:29:26 PST 2023


https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/71978

>From f2a6e4e2bb2554d55243385ddbae9cceebd081c9 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Thu, 7 Dec 2023 17:10:23 -0500
Subject: [PATCH] [HIP] support 128 bit int division

Currently nvcc supports 128 bit int division in device code.
This patch adds support of 128 bit int division to HIP.

It builds lib functions for 128 bit division in compiler-rt
for amdgcn target.

Then links compiler-rt with -mlink-bitcode-file.

It adds support of archive of bitcode to -mlink-bitcode-file.

Fixes: #71223

Fixes: SWDEV-426193
---
 clang/include/clang/CodeGen/CodeGenAction.h   |  11 ++
 clang/lib/CodeGen/CodeGenAction.cpp           | 110 ++++++++++++++++--
 clang/lib/Driver/ToolChains/HIPAMD.cpp        |   5 +
 .../test/CodeGenCUDA/link-bitcode-archive.cu  |  63 ++++++++++
 clang/test/Driver/hip-device-compiler-rt.hip  |  15 +++
 .../cmake/Modules/CompilerRTUtils.cmake       |   2 +
 compiler-rt/cmake/base-config-ix.cmake        |   5 +
 compiler-rt/cmake/builtin-config-ix.cmake     |   3 +-
 compiler-rt/lib/builtins/CMakeLists.txt       |  16 +++
 compiler-rt/lib/builtins/int_lib.h            |   2 +
 10 files changed, 220 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CodeGenCUDA/link-bitcode-archive.cu
 create mode 100644 clang/test/Driver/hip-device-compiler-rt.hip

diff --git a/clang/include/clang/CodeGen/CodeGenAction.h b/clang/include/clang/CodeGen/CodeGenAction.h
index 7ad2988e589eb2..f701808fa4b212 100644
--- a/clang/include/clang/CodeGen/CodeGenAction.h
+++ b/clang/include/clang/CodeGen/CodeGenAction.h
@@ -9,12 +9,16 @@
 #ifndef LLVM_CLANG_CODEGEN_CODEGENACTION_H
 #define LLVM_CLANG_CODEGEN_CODEGENACTION_H
 
+#include "clang/Basic/CodeGenOptions.h"
 #include "clang/Frontend/FrontendAction.h"
 #include <memory>
 
 namespace llvm {
   class LLVMContext;
   class Module;
+  namespace object {
+  class Archive;
+  }
 }
 
 namespace clang {
@@ -54,8 +58,15 @@ class CodeGenAction : public ASTFrontendAction {
   std::unique_ptr<llvm::Module> loadModule(llvm::MemoryBufferRef MBRef);
 
   /// Load bitcode modules to link into our module from the options.
+  /// \returns true if error happens.
   bool loadLinkModules(CompilerInstance &CI);
 
+  /// Add bitcode modules in an archive to LinkModules.
+  /// \returns true if error happens.
+  bool addArchiveToLinkModules(llvm::object::Archive *Archive,
+                               const CodeGenOptions::BitcodeFileToLink &F,
+                               CompilerInstance &CI);
+
 protected:
   /// Create a new code generation action.  If the optional \p _VMContext
   /// parameter is supplied, the action uses it without taking ownership,
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index bb6b1a3bc228cf..d682cb08a4f67c 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/LTO/LTOBackend.h"
 #include "llvm/Linker/Linker.h"
+#include "llvm/Object/Archive.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -940,33 +941,120 @@ CodeGenAction::~CodeGenAction() {
     delete VMContext;
 }
 
+bool CodeGenAction::addArchiveToLinkModules(
+    llvm::object::Archive *Archive, const CodeGenOptions::BitcodeFileToLink &F,
+    CompilerInstance &CI) {
+  Error Err = Error::success();
+
+  for (auto &Child : Archive->children(Err)) {
+    Expected<llvm::MemoryBufferRef> ChildBufOrErr = Child.getMemoryBufferRef();
+    if (!ChildBufOrErr) {
+      handleAllErrors(ChildBufOrErr.takeError(),
+                      [&](const llvm::ErrorInfoBase &EIB) {
+                        CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                            << F.Filename << EIB.message();
+                      });
+      LinkModules.clear();
+      return true;
+    }
+    auto ChildBuffer = llvm::MemoryBuffer::getMemBufferCopy(
+        ChildBufOrErr->getBuffer(), ChildBufOrErr->getBufferIdentifier());
+
+    if (!ChildBuffer) {
+      handleAllErrors(ChildBufOrErr.takeError(),
+                      [&](const llvm::ErrorInfoBase &EIB) {
+                        CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                            << F.Filename << EIB.message();
+                      });
+      LinkModules.clear();
+      return true;
+    }
+
+    Expected<std::unique_ptr<llvm::Module>> ChildModuleOrErr =
+        getOwningLazyBitcodeModule(std::move(ChildBuffer), *VMContext);
+    if (!ChildModuleOrErr) {
+      handleAllErrors(ChildModuleOrErr.takeError(),
+                      [&](const llvm::ErrorInfoBase &EIB) {
+                        CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                            << F.Filename << EIB.message();
+                      });
+      LinkModules.clear();
+      return true;
+    }
+
+    LinkModules.push_back({std::move(ChildModuleOrErr.get()), F.PropagateAttrs,
+                           F.Internalize, F.LinkFlags});
+  }
+  if (Err) {
+    CI.getDiagnostics().Report(diag::err_cannot_open_file)
+        << F.Filename << toString(std::move(Err));
+    LinkModules.clear();
+    return true;
+  }
+  return false;
+}
+
 bool CodeGenAction::loadLinkModules(CompilerInstance &CI) {
   if (!LinkModules.empty())
     return false;
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
        CI.getCodeGenOpts().LinkBitcodeFiles) {
-    auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-    if (!BCBuf) {
+
+    auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+    if (!BCBufOrErr) {
       CI.getDiagnostics().Report(diag::err_cannot_open_file)
-          << F.Filename << BCBuf.getError().message();
+          << F.Filename << BCBufOrErr.getError().message();
       LinkModules.clear();
       return true;
     }
 
+    auto &BCBuf = *BCBufOrErr;
+
     Expected<std::unique_ptr<llvm::Module>> ModuleOrErr =
-        getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-    if (!ModuleOrErr) {
-      handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
-        CI.getDiagnostics().Report(diag::err_cannot_open_file)
-            << F.Filename << EIB.message();
-      });
+        getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+    if (ModuleOrErr) {
+      LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+                             F.Internalize, F.LinkFlags});
+      continue;
+    }
+
+    // If parsing as bitcode failed, clear the error and try to parse as an
+    // archive.
+    handleAllErrors(ModuleOrErr.takeError(),
+                    [&](const llvm::ErrorInfoBase &EIB) {});
+
+    Expected<std::unique_ptr<llvm::object::Binary>> BinOrErr =
+        llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
+
+    if (!BinOrErr) {
+      handleAllErrors(BinOrErr.takeError(),
+                      [&](const llvm::ErrorInfoBase &EIB) {
+                        CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                            << F.Filename << EIB.message();
+                      });
+      LinkModules.clear();
+      return true;
+    }
+
+    std::unique_ptr<llvm::object::Binary> &Bin = *BinOrErr;
+
+    if (Bin->isArchive()) {
+      llvm::object::Archive *Archive =
+          llvm::cast<llvm::object::Archive>(Bin.get());
+      if (addArchiveToLinkModules(Archive, F, CI))
+        return true;
+    } else {
+      // It's not an archive, and we failed to parse it as bitcode, so report
+      // an error.
+      CI.getDiagnostics().Report(diag::err_cannot_open_file)
+          << F.Filename << "Unrecognized file format";
       LinkModules.clear();
       return true;
     }
-    LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
-                           F.Internalize, F.LinkFlags});
   }
+
   return false;
 }
 
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index ccb36a6c846c80..2ea3c97136c227 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/TargetParser.h"
 
 using namespace clang::driver;
@@ -403,6 +404,10 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
         BCLibs.emplace_back(AsanRTL, /*ShouldInternalize=*/false);
     }
 
+    auto BuiltinCRT = getCompilerRT(DriverArgs, "builtins");
+    if (getVFS().exists(BuiltinCRT))
+      BCLibs.emplace_back(BuiltinCRT, /*ShouldInternalize=*/false);
+
     // Add the HIP specific bitcode library.
     BCLibs.push_back(RocmInstallation->getHIPPath());
 
diff --git a/clang/test/CodeGenCUDA/link-bitcode-archive.cu b/clang/test/CodeGenCUDA/link-bitcode-archive.cu
new file mode 100644
index 00000000000000..1da92feeac31b7
--- /dev/null
+++ b/clang/test/CodeGenCUDA/link-bitcode-archive.cu
@@ -0,0 +1,63 @@
+// Prepare archive of bitcode file.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm-bc \
+// RUN:    -fcuda-is-device \
+// RUN:    -disable-llvm-passes -DIS_LIB -o %t.bc -xhip %s
+
+// RUN: rm -f %t.a
+// RUN: llvm-ar rcs %t.a %t.bc
+
+// Link archive of bitcode file.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:    -mlink-builtin-bitcode %t.a  -emit-llvm \
+// RUN:    -disable-llvm-passes -o - -xhip %s \
+// RUN:    | FileCheck %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:    -mlink-bitcode-file %t.a  -emit-llvm \
+// RUN:    -disable-llvm-passes -o - -xhip %s \
+// RUN:    | FileCheck %s
+
+// Test empty file as arhive.
+
+// RUN: rm -f %t.a
+// RUN: touch %t.a
+
+// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:    -mlink-builtin-bitcode %t.a  -emit-llvm \
+// RUN:    -disable-llvm-passes -o - -xhip %s 2>&1\
+// RUN:    | FileCheck %s -check-prefix=INVLID
+
+// Test invalid arhive.
+
+// RUN: echo "!<arch>\nfake" >%t.a
+// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:    -mlink-builtin-bitcode %t.a  -emit-llvm \
+// RUN:    -disable-llvm-passes -o - -xhip %s 2>&1 \
+// RUN:    | FileCheck %s -check-prefix=INVLID
+
+// Test archive of invalid bitcode file.
+
+// RUN: echo "BC\xC0\xDE" >%t.bc
+// RUN: rm -f %t.a
+// RUN: llvm-ar rcs %t.a %t.bc
+// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:    -mlink-builtin-bitcode %t.a  -emit-llvm \
+// RUN:    -disable-llvm-passes -o - -xhip %s 2>&1 \
+// RUN:    | FileCheck %s -check-prefix=INVLID-BC
+
+#include "Inputs/cuda.h"
+
+#ifdef IS_LIB
+__device__ void libfun() {}
+#else
+__device__ void libfun();
+__global__ void kern() {
+ libfun();
+}
+#endif
+
+// CHECK: define {{.*}}void @_Z6libfunv()
+// INVLID: fatal error: cannot open file {{.*}}: The file was not recognized as a valid object file
+// INVLID-BC: fatal error: cannot open file {{.*}}: Invalid bitcode signature
diff --git a/clang/test/Driver/hip-device-compiler-rt.hip b/clang/test/Driver/hip-device-compiler-rt.hip
new file mode 100644
index 00000000000000..1cc919314adc4e
--- /dev/null
+++ b/clang/test/Driver/hip-device-compiler-rt.hip
@@ -0,0 +1,15 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Check device compiler-rt is linked when available.
+
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx1010 \
+// RUN:   -nogpuinc -nostdinc --offload-device-only --rocm-path=%S/Inputs/rocm \
+// RUN:   -resource-dir=%S/Inputs/device_compiler_rt_resource_dir \
+// RUN:   %s 2>&1 | FileCheck %s
+
+// CHECK: "-mlink-bitcode-file" "{{[^"]+(/|\\\\)device_compiler_rt_resource_dir(/|\\\\)lib(64)?(/|\\\\)amdgcn-amd-amdhsa(/|\\\\).*}}libclang_rt.builtins.a"
+// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR:[^"]+(/|\\\\)rocm(/|\\\\)amdgcn(/|\\\\).*]]hip.bc"
+// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]ocml.bc"
+// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]ockl.bc"
+// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_isa_version_{{[0-9]+}}.bc"
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index 25e7823716fc2f..d0596a11c26a69 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -456,6 +456,8 @@ function(get_compiler_rt_target arch variable)
       endif()
     endif()
     set(target "${arch}${triple_suffix}")
+  elseif(${arch} STREQUAL "amdgcn")
+    set(target "amdgcn-amd-amdhsa")
   else()
     set(target "${arch}${triple_suffix}")
   endif()
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 908c8a40278cf0..54adb48f445d96 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -194,6 +194,11 @@ macro(test_targets)
     endif()
   endif()
 
+  set(COMPILER_RT_ENABLE_TARGET_AMDGCN OFF CACHE BOOL "Option to enable AMDGCN in Compiler RT")
+  if (COMPILER_RT_ENABLE_TARGET_AMDGCN)
+    add_default_target_arch("amdgcn")
+  endif()
+
   # Generate the COMPILER_RT_SUPPORTED_ARCH list.
   if(ANDROID)
     # Examine compiler output to determine target architecture.
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index b40138aa011f8f..8b794f5a9a9249 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -65,6 +65,7 @@ set(SPARCV9 sparcv9)
 set(WASM32 wasm32)
 set(WASM64 wasm64)
 set(VE ve)
+set(AMDGCN amdgcn)
 
 if(APPLE)
   set(ARM64 arm64 arm64e)
@@ -76,7 +77,7 @@ set(ALL_BUILTIN_SUPPORTED_ARCH
   ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
   ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
   ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
-  ${WASM32} ${WASM64} ${VE} ${LOONGARCH64})
+  ${WASM32} ${WASM64} ${VE} ${LOONGARCH64} ${AMDGCN})
 
 include(CompilerRTUtils)
 include(CompilerRTDarwinUtils)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index ea72c595a9b807..8c3c4d787a29be 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -560,6 +560,13 @@ set(aarch64_SOURCES
   aarch64/fp_mode.c
 )
 
+set(amdgcn_SOURCES
+  divti3.c
+  udivmodti4.c
+  truncdfbf2.c
+  truncsfbf2.c
+)
+
 if(COMPILER_RT_HAS_ASM_SME AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
   list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c)
   message(STATUS "AArch64 SME ABI routines enabled")
@@ -846,6 +853,15 @@ else ()
         list(APPEND BUILTIN_CFLAGS_${arch} -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
       endif()
 
+      if (${arch} STREQUAL "amdgcn")
+        list(APPEND BUILTIN_CFLAGS_${arch}
+             --target=amdgcn-amd-amdhsa
+             -emit-llvm
+             -nogpuinc
+             -nogpulib
+             -Xclang -mcode-object-version=none )
+      endif()
+
       # For RISCV32, we must force enable int128 for compiling long
       # double routines.
       if(COMPILER_RT_ENABLE_SOFTWARE_INT128 OR "${arch}" STREQUAL "riscv32")
diff --git a/compiler-rt/lib/builtins/int_lib.h b/compiler-rt/lib/builtins/int_lib.h
index 04ea2d910574bc..203d395dbded5c 100644
--- a/compiler-rt/lib/builtins/int_lib.h
+++ b/compiler-rt/lib/builtins/int_lib.h
@@ -26,6 +26,8 @@
 #else
 #define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
 #endif
+#elif __AMDGPU__
+#define COMPILER_RT_ABI __attribute__((amdgpu_lib_fun, weak))
 #else
 #define COMPILER_RT_ABI
 #endif



More information about the cfe-commits mailing list