[flang-commits] [flang] [mlir] [clang] [Flang][OpenMP][MLIR] Add support for -nogpulib option (PR #71045)
Dominik Adamski via flang-commits
flang-commits at lists.llvm.org
Thu Nov 2 03:54:22 PDT 2023
https://github.com/DominikAdamski created https://github.com/llvm/llvm-project/pull/71045
If -nogpulib option is passed by the user, then the OpenMP device runtime is not used and we should not emit globals to configure debugging at compile-time for the device runtime.
Link to -nogpulib flag implementation for Clang: https://reviews.llvm.org/D125314
>From 684ade39bda46edd6392521cc63902486659559b Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 31 Oct 2023 08:06:59 -0500
Subject: [PATCH 1/2] [MLIR][OpenMP] Add nogpulib parameter to OpenMP
attributes
Added an OpenMP attribute which will model nogpulib flag.
If nogpulib option is passed by the user, then the OpenMP device
runtime is not used and we should not emit globals to configure
debugging at compile-time for the device runtime.
Link to -nogpulib flag implementation for Clang:
https://reviews.llvm.org/D125314
---
flang/include/flang/Tools/CrossToolHelpers.h | 11 +++++++----
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 1 +
.../mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td | 5 +++--
.../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 8 ++++++--
mlir/test/Dialect/OpenMP/attr.mlir | 6 ++++++
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 10 ++++++++++
6 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index ddec70fa9824c52..a1f5c871992a1f8 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -53,14 +53,16 @@ struct OffloadModuleOpts {
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
- bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {})
+ bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
+ bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
- OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {}
+ OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
+ NoGPULib(NoGPULib) {}
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -70,7 +72,7 @@ struct OffloadModuleOpts {
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
- OMPHostIRFile(Opts.OMPHostIRFile) {}
+ OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(false) {}
uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
@@ -81,6 +83,7 @@ struct OffloadModuleOpts {
bool OpenMPIsGPU = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
+ bool NoGPULib = false;
};
// Shares assinging of the OpenMP OffloadModuleInterface and its assorted
@@ -95,7 +98,7 @@ void setOffloadModuleInterfaceAttributes(
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
- Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion);
+ Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, false);
if (!Opts.OMPHostIRFile.empty())
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 72121ad4f9e47a5..ae2dd018059e3aa 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -65,6 +65,7 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
DefaultValuedParameter<"bool", "false">:$assume_threads_oversubscription,
DefaultValuedParameter<"bool", "false">:$assume_no_thread_state,
DefaultValuedParameter<"bool", "false">:$assume_no_nested_parallelism,
+ DefaultValuedParameter<"bool", "false">:$no_gpu_lib,
DefaultValuedParameter<"uint32_t", "50">:$openmp_device_version
);
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 77001fc816cf91f..89d04af64766fc2 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -198,11 +198,12 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
"bool":$assumeThreadsOversubscription,
"bool":$assumeNoThreadState,
"bool":$assumeNoNestedParallelism,
- "uint32_t":$openmpDeviceVersion), [{}], [{
+ "uint32_t":$openmpDeviceVersion,
+ "bool":$noGPULib), [{}], [{
$_op->setAttr(("omp." + mlir::omp::FlagsAttr::getMnemonic()).str(),
mlir::omp::FlagsAttr::get($_op->getContext(), debugKind,
assumeTeamsOversubscription, assumeThreadsOversubscription,
- assumeNoThreadState, assumeNoNestedParallelism, openmpDeviceVersion));
+ assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion));
}]>,
InterfaceMethod<
/*description=*/[{
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1daf60b8659bb66..083b507f405f426 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2011,6 +2011,12 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
+ attribute.getOpenmpDeviceVersion());
+
+ if (attribute.getNoGpuLib())
+ return success();
+
ompBuilder->createGlobalFlag(
attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
"__omp_rtl_debug_kind");
@@ -2032,8 +2038,6 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
.getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
,
"__omp_rtl_assume_no_nested_parallelism");
- ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
- attribute.getOpenmpDeviceVersion());
return success();
}
diff --git a/mlir/test/Dialect/OpenMP/attr.mlir b/mlir/test/Dialect/OpenMP/attr.mlir
index fc89ebd01b6bf57..2e88dac8ff4d2b9 100644
--- a/mlir/test/Dialect/OpenMP/attr.mlir
+++ b/mlir/test/Dialect/OpenMP/attr.mlir
@@ -54,6 +54,12 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,
// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, openmp_device_version = 51>} {
module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, openmp_device_version = 51>} {}
+// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, no_gpu_lib = true, openmp_device_version = 51>} {
+module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, no_gpu_lib = true, openmp_device_version = 51>} {}
+
+// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, openmp_device_version = 51>} {
+module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, no_gpu_lib = false, openmp_device_version = 51>} {}
+
// CHECK: module attributes {omp.version = #omp.version<version = 51>} {
module attributes {omp.version = #omp.version<version = 51>} {}
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 116c05f3747c6ae..8da17107466f817 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2530,6 +2530,16 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,
// -----
+// CHECK-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
+// CHECK-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
+// CHECK-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+// CHECK-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
+// CHECK-NOT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
+module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true,
+ no_gpu_lib=true>} {}
+
+// -----
+
module attributes {omp.is_target_device = false} {
// CHECK: define void @filter_nohost
llvm.func @filter_nohost() -> ()
>From 30a2a7ba71c7e4b0e3eccf77f4c33c42872e75f5 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Thu, 2 Nov 2023 05:03:14 -0500
Subject: [PATCH 2/2] [Flang] Add -nogpulib flag to command line option
If nogpulib option is passed by the user, then the OpenMP device
runtime is not used and we should not emit globals to configure
debugging at compile-time for the device runtime.
Link to -nogpulib flag implementation for Clang:
https://reviews.llvm.org/D125314
---
clang/include/clang/Driver/Options.td | 2 +-
clang/lib/Driver/ToolChains/Flang.cpp | 2 ++
flang/include/flang/Frontend/LangOptions.def | 2 ++
flang/include/flang/Tools/CrossToolHelpers.h | 4 ++--
flang/lib/Frontend/CompilerInvocation.cpp | 2 ++
flang/test/Driver/driver-help-hidden.f90 | 1 +
flang/test/Driver/driver-help.f90 | 2 ++
flang/test/Lower/OpenMP/nogpulib.f90 | 12 ++++++++++++
flang/tools/bbc/bbc.cpp | 8 +++++++-
9 files changed, 31 insertions(+), 4 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/nogpulib.f90
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index b1229b2f4562379..cc50b45b7933d5f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5092,7 +5092,7 @@ def nohipwrapperinc : Flag<["-"], "nohipwrapperinc">, Group<IncludePath_Group>,
HelpText<"Do not include the default HIP wrapper headers and include paths">;
def : Flag<["-"], "nocudainc">, Alias<nogpuinc>;
def nogpulib : Flag<["-"], "nogpulib">, MarshallingInfoFlag<LangOpts<"NoGPULib">>,
- Visibility<[ClangOption, CC1Option]>,
+ Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Do not link device library for CUDA/HIP device compilation">;
def : Flag<["-"], "nocudalib">, Alias<nogpulib>;
def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option]>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 999039f83ddfb92..17d9ac7632128d6 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -295,6 +295,8 @@ void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs,
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
+ if (Args.hasArg(options::OPT_nogpulib))
+ CmdArgs.push_back("-nogpulib");
}
}
diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def
index 3a1d44f7fb472d1..2bf10826120a8bd 100644
--- a/flang/include/flang/Frontend/LangOptions.def
+++ b/flang/include/flang/Frontend/LangOptions.def
@@ -21,6 +21,8 @@ LANGOPT(Name, Bits, Default)
ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Fast) ///< FP Contract Mode (off/fast)
+/// Indicate a build without the standard GPU libraries.
+LANGOPT(NoGPULib , 1, false)
/// Permit floating point optimization without regard to infinities
LANGOPT(NoHonorInfs, 1, false)
/// Permit floating point optimization without regard to NaN
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index a1f5c871992a1f8..3b51a3845625df3 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -72,7 +72,7 @@ struct OffloadModuleOpts {
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
- OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(false) {}
+ OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
@@ -98,7 +98,7 @@ void setOffloadModuleInterfaceAttributes(
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
- Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, false);
+ Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);
if (!Opts.OMPHostIRFile.empty())
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index ba2ecab3742587a..ef0db22037283d3 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -881,6 +881,8 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug))
res.getLangOpts().OpenMPTargetDebug = 1;
}
+ if (args.hasArg(clang::driver::options::OPT_nogpulib))
+ res.getLangOpts().NoGPULib = 1;
}
switch (llvm::Triple(res.getTargetOpts().triple).getArch()) {
diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90
index 6d399f1d179a022..2e65096d0c3fec4 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -119,6 +119,7 @@
! CHECK-NEXT: --no-offload-arch=<value>
! CHECK-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
! CHECK-NEXT: -nocpp Disable predefined and command line preprocessor macros
+! CHECK-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! CHECK-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
! CHECK-NEXT: --offload-device-only Only compile for the offloading device.
! CHECK-NEXT: --offload-host-device Compile for both the offloading host and device (default).
diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90
index 31c9caa32ea8292..a29e8a0b69db46d 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -107,6 +107,7 @@
! HELP-NEXT: --no-offload-arch=<value>
! HELP-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
! HELP-NEXT: -nocpp Disable predefined and command line preprocessor macros
+! HELP-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! HELP-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
! HELP-NEXT: --offload-device-only Only compile for the offloading device.
! HELP-NEXT: --offload-host-device Compile for both the offloading host and device (default).
@@ -239,6 +240,7 @@
! HELP-FC1-NEXT: -mvscale-max=<value> Specify the vscale maximum. Defaults to the vector length agnostic value of "0". (AArch64/RISC-V only)
! HELP-FC1-NEXT: -mvscale-min=<value> Specify the vscale minimum. Defaults to "1". (AArch64/RISC-V only)
! HELP-FC1-NEXT: -nocpp Disable predefined and command line preprocessor macros
+! HELP-FC1-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! HELP-FC1-NEXT: -opt-record-file <value>
! HELP-FC1-NEXT: File name to use for YAML optimization record output
! HELP-FC1-NEXT: -opt-record-format <value>
diff --git a/flang/test/Lower/OpenMP/nogpulib.f90 b/flang/test/Lower/OpenMP/nogpulib.f90
new file mode 100644
index 000000000000000..f2e67136ecd7481
--- /dev/null
+++ b/flang/test/Lower/OpenMP/nogpulib.f90
@@ -0,0 +1,12 @@
+!REQUIRES: amdgpu-registered-target
+
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -o - %s | FileCheck %s
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device -nogpulib %s -o - | FileCheck %s -check-prefix=FLAG_SET
+!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -nogpulib -o - %s | FileCheck %s -check-prefix=FLAG_SET
+
+!CHECK-NOT: module attributes {{{.*}}no_gpu_lib
+!FLAG_SET: module attributes {{{.*}}no_gpu_lib = true
+subroutine omp_subroutine()
+end subroutine omp_subroutine
+
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 378216bd1e51f8f..a66c41eed59651c 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -177,6 +177,12 @@ static llvm::cl::opt<bool> setOpenMPNoNestedParallelism(
"a parallel region."),
llvm::cl::init(false));
+static llvm::cl::opt<bool>
+ setNoGPULib("nogpulib",
+ llvm::cl::desc("Do not link device library for CUDA/HIP device "
+ "compilation"),
+ llvm::cl::init(false));
+
static llvm::cl::opt<bool> enableOpenACC("fopenacc",
llvm::cl::desc("enable openacc"),
llvm::cl::init(false));
@@ -316,7 +322,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
setOpenMPThreadSubscription, setOpenMPNoThreadState,
setOpenMPNoNestedParallelism, enableOpenMPDevice,
- enableOpenMPGPU, setOpenMPVersion);
+ enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
}
More information about the flang-commits
mailing list