[clang] dd5d65a - [HIP][Clang][CodeGen] Add CodeGen support for `hipstdpar`

Alex Voicu via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 17 03:41:45 PDT 2023


Author: Alex Voicu
Date: 2023-10-17T11:41:36+01:00
New Revision: dd5d65adb6413122a5ba1ed04c5c2c0b4951b76c

URL: https://github.com/llvm/llvm-project/commit/dd5d65adb6413122a5ba1ed04c5c2c0b4951b76c
DIFF: https://github.com/llvm/llvm-project/commit/dd5d65adb6413122a5ba1ed04c5c2c0b4951b76c.diff

LOG: [HIP][Clang][CodeGen] Add CodeGen support for `hipstdpar`

This patch adds the CodeGen changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. This change relaxes restrictions on what gets emitted on the device path, when compiling in `hipstdpar` mode:

1. Unless a function is explicitly marked `__host__`, it will get emitted, whereas before only `__device__` and `__global__` functions would be emitted;
2. Unsupported builtins are ignored as opposed to being marked as an error, as the decision on their validity is deferred to the `hipstdpar` specific code selection pass;
3. We add a `hipstdpar` specific pass to the opt pipeline, independent of optimisation level:
    - When compiling for the host, iff the user requested it via the `--hipstdpar-interpose-alloc` flag, we add a pass which replaces canonical allocation / deallocation functions with accelerator aware equivalents.

A test to validate that unannotated functions get correctly emitted is added as well.

Reviewed by: yaxunl, efriedma

Differential Revision: https://reviews.llvm.org/D155850

Added: 
    clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp
    clang/test/CodeGenHipStdPar/unsupported-ASM.cpp
    clang/test/CodeGenHipStdPar/unsupported-builtins.cpp

Modified: 
    clang/lib/CodeGen/BackendUtil.cpp
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CGStmt.cpp
    clang/lib/CodeGen/CMakeLists.txt
    clang/lib/CodeGen/CodeGenFunction.cpp
    clang/lib/CodeGen/CodeGenModule.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index d066819871dfde3..70accce456d3c07 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -78,6 +78,7 @@
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/HipStdPar/HipStdPar.h"
 #include "llvm/Transforms/Utils/Debugify.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1108,6 +1109,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     return;
   }
 
+  if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
+      LangOpts.HIPStdParInterposeAlloc)
+    MPM.addPass(HipStdParAllocationInterpositionPass());
+
   // Now that we have all of the passes ready, run them.
   {
     PrettyStackTraceString CrashInfo("Optimizer");

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4d86e8a769846c4..43ace3e11e6109f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2327,6 +2327,19 @@ static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
   return nullptr;
 }
 
+static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
+                                              const FunctionDecl *FD) {
+  auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
+  auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
+  auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
+
+  SmallVector<Value *, 16> Args;
+  for (auto &&FormalTy : FnTy->params())
+    Args.push_back(llvm::PoisonValue::get(FormalTy));
+
+  return RValue::get(CGF->Builder.CreateCall(UBF, Args));
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -5765,6 +5778,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
   }
 
+  if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
+    return EmitHipStdParUnsupportedBuiltin(this, FD);
+
   ErrorUnsupported(E, "builtin function");
 
   // Unknown builtin, for now just dump it out and return undef.
@@ -5775,6 +5791,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
                                         unsigned BuiltinID, const CallExpr *E,
                                         ReturnValueSlot ReturnValue,
                                         llvm::Triple::ArchType Arch) {
+  // When compiling in HipStdPar mode we have to be conservative in rejecting
+  // target specific features in the FE, and defer the possible error to the
+  // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
+  // referenced by an accelerator executable function, we emit an error.
+  // Returning nullptr here leads to the builtin being handled in
+  // EmitStdParUnsupportedBuiltin.
+  if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
+      Arch != CGF->getTarget().getTriple().getArch())
+    return nullptr;
+
   switch (Arch) {
   case llvm::Triple::arm:
   case llvm::Triple::armeb:

diff  --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6674aa2409a5947..c719df1bfa05036 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2420,6 +2420,24 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
   }
 }
 
+static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF,
+                                        const AsmStmt &S) {
+  constexpr auto Name = "__ASM__hipstdpar_unsupported";
+
+  StringRef Asm;
+  if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S))
+    Asm = GCCAsm->getAsmString()->getString();
+
+  auto &Ctx = CGF->CGM.getLLVMContext();
+
+  auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm);
+  auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
+                                      {StrTy->getType()}, false);
+  auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
+
+  CGF->Builder.CreateCall(UBF, {StrTy});
+}
+
 void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   // Pop all cleanup blocks at the end of the asm statement.
   CodeGenFunction::RunCleanupsScope Cleanups(*this);
@@ -2431,27 +2449,38 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
   SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos;
 
-  for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
+  bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
+  bool IsValidTargetAsm = true;
+  for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) {
     StringRef Name;
     if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
       Name = GAS->getOutputName(i);
     TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
     bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
-    assert(IsValid && "Failed to parse output constraint");
+    if (IsHipStdPar && !IsValid)
+      IsValidTargetAsm = false;
+    else
+      assert(IsValid && "Failed to parse output constraint");
     OutputConstraintInfos.push_back(Info);
   }
 
-  for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
+  for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) {
     StringRef Name;
     if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
       Name = GAS->getInputName(i);
     TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
     bool IsValid =
       getTarget().validateInputConstraint(OutputConstraintInfos, Info);
-    assert(IsValid && "Failed to parse input constraint"); (void)IsValid;
+    if (IsHipStdPar && !IsValid)
+      IsValidTargetAsm = false;
+    else
+      assert(IsValid && "Failed to parse input constraint");
     InputConstraintInfos.push_back(Info);
   }
 
+  if (!IsValidTargetAsm)
+    return EmitHipStdParUnsupportedAsm(this, S);
+
   std::string Constraints;
 
   std::vector<LValue> ResultRegDests;

diff  --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 1debeb6d9cce9e0..9fab15abe6404c6 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
   Extensions
   FrontendHLSL
   FrontendOpenMP
+  HIPStdPar
   IPO
   IRPrinter
   IRReader

diff  --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 42777194cc76dc0..3682a2c6ae859ea 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2594,10 +2594,15 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
   std::string MissingFeature;
   llvm::StringMap<bool> CallerFeatureMap;
   CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD);
+  // When compiling in HipStdPar mode we have to be conservative in rejecting
+  // target specific features in the FE, and defer the possible error to the
+  // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
+  // referenced by an accelerator executable function, we emit an error.
+  bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
   if (BuiltinID) {
     StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
     if (!Builtin::evaluateRequiredTargetFeatures(
-        FeatureList, CallerFeatureMap)) {
+        FeatureList, CallerFeatureMap) && !IsHipStdPar) {
       CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
           << TargetDecl->getDeclName()
           << FeatureList;
@@ -2630,7 +2635,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
         return false;
       }
       return true;
-    }))
+    }) && !IsHipStdPar)
       CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
           << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
   } else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) {
@@ -2639,7 +2644,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
 
     for (const auto &F : CalleeFeatureMap) {
       if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) ||
-                           !CallerFeatureMap.find(F.getKey())->getValue()))
+                           !CallerFeatureMap.find(F.getKey())->getValue()) &&
+          !IsHipStdPar)
         CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
             << FD->getDeclName() << TargetDecl->getDeclName() << F.getKey();
     }

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 754377bed7f7eef..b1a6683a66bd052 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3526,7 +3526,7 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject(
     GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
   Emitter.finalize(GV);
 
-  return ConstantAddress(GV, GV->getValueType(), Alignment);
+    return ConstantAddress(GV, GV->getValueType(), Alignment);
 }
 
 ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
@@ -3585,7 +3585,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
           !Global->hasAttr<CUDAConstantAttr>() &&
           !Global->hasAttr<CUDASharedAttr>() &&
           !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
-          !Global->getType()->isCUDADeviceBuiltinTextureType())
+          !Global->getType()->isCUDADeviceBuiltinTextureType() &&
+          !(LangOpts.HIPStdPar &&
+            isa<FunctionDecl>(Global) &&
+            !Global->hasAttr<CUDAHostAttr>()))
         return;
     } else {
       // We need to emit host-side 'shadows' for all global

diff  --git a/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp b/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp
new file mode 100644
index 000000000000000..1fa37ea6c342ff7
--- /dev/null
+++ b/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=NO-HIPSTDPAR-DEV %s
+
+// RUN: %clang_cc1 --hipstdpar -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=HIPSTDPAR-DEV %s
+
+#define __device__ __attribute__((device))
+
+// NO-HIPSTDPAR-DEV-NOT: define {{.*}} void @foo({{.*}})
+// HIPSTDPAR-DEV: define {{.*}} void @foo({{.*}})
+extern "C" void foo(float *a, float b) {
+  *a = b;
+}
+
+// NO-HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
+// HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
+extern "C" __device__ void bar(float *a, float b) {
+  *a = b;
+}

diff  --git a/clang/test/CodeGenHipStdPar/unsupported-ASM.cpp b/clang/test/CodeGenHipStdPar/unsupported-ASM.cpp
new file mode 100644
index 000000000000000..485bf916c899f13
--- /dev/null
+++ b/clang/test/CodeGenHipStdPar/unsupported-ASM.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
+// RUN:   --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+#define __global__ __attribute__((global))
+
+__global__ void foo(int i) {
+    asm ("addl %2, %1; seto %b0" : "=q" (i), "+g" (i) : "r" (i));
+}
+
+// CHECK: declare void @__ASM__hipstdpar_unsupported([{{.*}}])

diff  --git a/clang/test/CodeGenHipStdPar/unsupported-builtins.cpp b/clang/test/CodeGenHipStdPar/unsupported-builtins.cpp
new file mode 100644
index 000000000000000..02355eca2672ebf
--- /dev/null
+++ b/clang/test/CodeGenHipStdPar/unsupported-builtins.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
+// RUN:   --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+#define __global__ __attribute__((global))
+
+__global__ void foo() { return __builtin_ia32_pause(); }
+
+// CHECK: declare void @__builtin_ia32_pause__hipstdpar_unsupported()


        


More information about the cfe-commits mailing list