[compiler-rt] [clang] [llvm] [HIP] support 128 bit int division (PR #71978)

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 10 12:36:04 PST 2023


https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/71978

Currently nvcc supports 128 bit int division in device code. This patch adds support of 128 bit int division to HIP.

It builds lib functions for 128 bit division in compiler-rt for amdgcn target.

Then links compiler-rt with -mlink-bitcode-file.

It adds support of archive of bitcode to -mlink-bitcode-file.

It adds support of call of lib function in amdgcn backend.

Fixes: https://github.com/llvm/llvm-project/issues/71223

Fixes: SWDEV-426193

>From 39ec509a5e8ca93c64949653d57d959d87de8da6 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Fri, 10 Nov 2023 13:51:58 -0500
Subject: [PATCH] [HIP] support 128 bit int division

Currently nvcc supports 128 bit int division in device code.
This patch adds support of 128 bit int division to HIP.

It builds lib functions for 128 bit division in compiler-rt
for amdgcn target.

Then links compiler-rt with -mlink-bitcode-file.

It adds support of archive of bitcode to -mlink-bitcode-file.

It adds support of call of lib function in amdgcn backend.

Fixes: https://github.com/llvm/llvm-project/issues/71223

Fixes: SWDEV-426193
---
 clang/lib/CodeGen/CodeGenAction.cpp           | 103 ++++++++++++++++--
 clang/lib/Driver/ToolChains/HIPAMD.cpp        |   5 +
 .../cmake/Modules/CompilerRTUtils.cmake       |   2 +
 compiler-rt/cmake/base-config-ix.cmake        |   5 +
 compiler-rt/cmake/builtin-config-ix.cmake     |   3 +-
 compiler-rt/lib/builtins/CMakeLists.txt       |  16 +++
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   4 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   1 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  14 ++-
 9 files changed, 136 insertions(+), 17 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index a31a271ed77d1ca..f5a3274fbdd2c3e 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/LTO/LTOBackend.h"
 #include "llvm/Linker/Linker.h"
+#include "llvm/Object/Archive.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) {
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
        CI.getCodeGenOpts().LinkBitcodeFiles) {
-    auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-    if (!BCBuf) {
+
+    auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+    if (!BCBufOrErr) {
       CI.getDiagnostics().Report(diag::err_cannot_open_file)
-          << F.Filename << BCBuf.getError().message();
+          << F.Filename << BCBufOrErr.getError().message();
       LinkModules.clear();
       return true;
     }
 
+    auto &BCBuf = *BCBufOrErr;
+
     Expected<std::unique_ptr<llvm::Module>> ModuleOrErr =
-        getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-    if (!ModuleOrErr) {
-      handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+        getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+    if (ModuleOrErr) {
+      LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+                             F.Internalize, F.LinkFlags});
+      continue;
+    } else {
+      // If parsing as bitcode failed, clear the error and try to parse as an
+      // archive.
+      handleAllErrors(ModuleOrErr.takeError(),
+                      [&](const llvm::ErrorInfoBase &EIB) {});
+
+      Expected<std::unique_ptr<llvm::object::Binary>> BinOrErr =
+          llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
+
+      if (!BinOrErr) {
+        handleAllErrors(BinOrErr.takeError(),
+                        [&](const llvm::ErrorInfoBase &EIB) {
+                          CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                              << F.Filename << EIB.message();
+                        });
+        LinkModules.clear();
+        return true;
+      }
+
+      std::unique_ptr<llvm::object::Binary> &Bin = *BinOrErr;
+
+      if (Bin->isArchive()) {
+        llvm::object::Archive *Archive =
+            llvm::cast<llvm::object::Archive>(Bin.get());
+        Error Err = Error::success();
+
+        for (auto &Child : Archive->children(Err)) {
+          Expected<llvm::MemoryBufferRef> ChildBufOrErr =
+              Child.getMemoryBufferRef();
+          if (!ChildBufOrErr) {
+            handleAllErrors(
+                ChildBufOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) {
+                  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                      << F.Filename << EIB.message();
+                });
+            continue;
+          }
+          auto ChildBuffer = llvm::MemoryBuffer::getMemBufferCopy(
+              ChildBufOrErr->getBuffer(), ChildBufOrErr->getBufferIdentifier());
+
+          if (!ChildBuffer) {
+            handleAllErrors(
+                ChildBufOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) {
+                  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                      << F.Filename << EIB.message();
+                });
+            continue;
+          }
+
+          Expected<std::unique_ptr<llvm::Module>> ChildModuleOrErr =
+              getOwningLazyBitcodeModule(std::move(ChildBuffer), *VMContext);
+          if (!ChildModuleOrErr) {
+            handleAllErrors(
+                ChildModuleOrErr.takeError(),
+                [&](const llvm::ErrorInfoBase &EIB) {
+                  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+                      << F.Filename << EIB.message();
+                });
+            continue;
+          }
+
+          LinkModules.push_back({std::move(ChildModuleOrErr.get()),
+                                 F.PropagateAttrs, F.Internalize, F.LinkFlags});
+        }
+        if (Err) {
+          CI.getDiagnostics().Report(diag::err_cannot_open_file)
+              << F.Filename << toString(std::move(Err));
+          LinkModules.clear();
+          return true;
+        }
+      } else {
+        // It's not an archive, and we failed to parse it as bitcode, so report
+        // an error.
         CI.getDiagnostics().Report(diag::err_cannot_open_file)
-            << F.Filename << EIB.message();
-      });
-      LinkModules.clear();
-      return true;
+            << F.Filename << "Unrecognized file format";
+        LinkModules.clear();
+        return true;
+      }
     }
-    LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
-                           F.Internalize, F.LinkFlags});
   }
+
   return false;
 }
 
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index ccb36a6c846c806..2ea3c97136c2272 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/TargetParser.h"
 
 using namespace clang::driver;
@@ -403,6 +404,10 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
         BCLibs.emplace_back(AsanRTL, /*ShouldInternalize=*/false);
     }
 
+    auto BuiltinCRT = getCompilerRT(DriverArgs, "builtins");
+    if (getVFS().exists(BuiltinCRT))
+      BCLibs.emplace_back(BuiltinCRT, /*ShouldInternalize=*/false);
+
     // Add the HIP specific bitcode library.
     BCLibs.push_back(RocmInstallation->getHIPPath());
 
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index 25e7823716fc2f4..d0596a11c26a69a 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -456,6 +456,8 @@ function(get_compiler_rt_target arch variable)
       endif()
     endif()
     set(target "${arch}${triple_suffix}")
+  elseif(${arch} STREQUAL "amdgcn")
+    set(target "amdgcn-amd-amdhsa")
   else()
     set(target "${arch}${triple_suffix}")
   endif()
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 908c8a40278cf0c..54adb48f445d96f 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -194,6 +194,11 @@ macro(test_targets)
     endif()
   endif()
 
+  set(COMPILER_RT_ENABLE_TARGET_AMDGCN OFF CACHE BOOL "Option to enable AMDGCN in Compiler RT")
+  if (COMPILER_RT_ENABLE_TARGET_AMDGCN)
+    add_default_target_arch("amdgcn")
+  endif()
+
   # Generate the COMPILER_RT_SUPPORTED_ARCH list.
   if(ANDROID)
     # Examine compiler output to determine target architecture.
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 5ccc5d7a559b2ac..02db2f9ae2264f2 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -64,6 +64,7 @@ set(SPARCV9 sparcv9)
 set(WASM32 wasm32)
 set(WASM64 wasm64)
 set(VE ve)
+set(AMDGCN amdgcn)
 
 if(APPLE)
   set(ARM64 arm64 arm64e)
@@ -75,7 +76,7 @@ set(ALL_BUILTIN_SUPPORTED_ARCH
   ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
   ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
   ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
-  ${WASM32} ${WASM64} ${VE} ${LOONGARCH64})
+  ${WASM32} ${WASM64} ${VE} ${LOONGARCH64} ${AMDGCN})
 
 include(CompilerRTUtils)
 include(CompilerRTDarwinUtils)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 360fdb0e99b57be..e7f2b370f50b271 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -553,6 +553,13 @@ set(aarch64_SOURCES
   aarch64/fp_mode.c
 )
 
+set(amdgcn_SOURCES
+  divti3.c
+  udivmodti4.c
+  truncdfbf2.c
+  truncsfbf2.c
+)
+
 if(COMPILER_RT_HAS_ASM_SME AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
   list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c)
   message(STATUS "AArch64 SME ABI routines enabled")
@@ -838,6 +845,15 @@ else ()
         list(APPEND BUILTIN_CFLAGS_${arch} -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
       endif()
 
+      if (${arch} STREQUAL "amdgcn")
+        list(APPEND BUILTIN_CFLAGS_${arch}
+             --target=amdgcn-amd-amdhsa
+             -emit-llvm
+             -nogpuinc
+             -nogpulib
+             -Xclang -mcode-object-version=none )
+      endif()
+
       # For RISCV32, we must force enable int128 for compiling long
       # double routines.
       if(COMPILER_RT_ENABLE_SOFTWARE_INT128 OR "${arch}" STREQUAL "riscv32")
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d4df9b6f49eb11c..e28d66fddad5aea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -501,9 +501,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
   AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);
 
-  // There are no libcalls of any kind.
   for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
     setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
+  // Supported compiler-rt libcalls should be enabled in compiler-rt for
+  // amdgcn first then added here.
+  setLibcallName(RTLIB::SDIV_I128, "__divti3");
 
   setSchedulingPreference(Sched::RegPressure);
   setJumpIsExpensive(true);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 951ed9420594b19..c78a031b624ce82 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -596,6 +596,7 @@ static bool mustPreserveGV(const GlobalValue &GV) {
   if (const Function *F = dyn_cast<Function>(&GV))
     return F->isDeclaration() || F->getName().startswith("__asan_") ||
            F->getName().startswith("__sanitizer_") ||
+           F->getName() == "__divti3" ||
            AMDGPU::isEntryFunctionCC(F->getCallingConv());
 
   GV.removeDeadConstantUsers();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4681004d3ba74ff..56425d631291ef6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3426,8 +3426,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
                               "unsupported call to variadic function ");
   }
 
-  if (!CLI.CB)
-    report_fatal_error("unsupported libcall legalization");
+  if (!CLI.CB && Callee.getNode()->getOpcode() != ISD::ExternalSymbol)
+    report_fatal_error(
+        "unsupported libcall legalization: Callee function is unknown");
 
   if (IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) {
     return lowerUnhandledCall(CLI, InVals,
@@ -3630,10 +3631,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   std::vector<SDValue> Ops;
   Ops.push_back(Chain);
+  bool AddTargetGlobalAddr = true;
+  // Try to find the callee in the current module.
+  if (isa<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getSymbolFunctionGlobalAddress(Callee);
+    AddTargetGlobalAddr = false;
+  }
   Ops.push_back(Callee);
   // Add a redundant copy of the callee global which will not be legalized, as
   // we need direct access to the callee later.
-  if (GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee)) {
+  GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee);
+  if (GSD && AddTargetGlobalAddr) {
     const GlobalValue *GV = GSD->getGlobal();
     Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
   } else {



More information about the cfe-commits mailing list