[clang] [flang] [Flang][OpenMP] Add -fopenmp-force-usm option to flang (PR #94359)
Sergio Afonso via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 4 07:36:42 PDT 2024
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/94359
This patch enables the `-fopenmp-force-usm` option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of the `unified_shared_memory` bit to the `omp.requires` attribute of the top-level module operation.
This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit.
>From 369850438197a4176c9fe2689ad9e8032ead5488 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 4 Jun 2024 15:26:38 +0100
Subject: [PATCH] [Flang][OpenMP] Add -fopenmp-force-usm option to flang
This patch enables the `-fopenmp-force-usm` option to be passed to the flang
driver, which forwards it to the compiler frontend. This flag, when set,
results in the introduction of the `unified_shared_memory` bit to the
`omp.requires` attribute of the top-level module operation.
This is later combined with any other target device-related REQUIRES clauses
that may have been explicitly set in the compilation unit.
---
clang/include/clang/Driver/Options.td | 2 +-
clang/lib/Driver/ToolChains/Flang.cpp | 2 ++
flang/include/flang/Frontend/LangOptions.def | 2 ++
flang/include/flang/Tools/CrossToolHelpers.h | 20 ++++++++++++-------
flang/lib/Frontend/CompilerInvocation.cpp | 3 +++
flang/lib/Lower/OpenMP/OpenMP.cpp | 4 +++-
flang/test/Driver/omp-driver-offload.f90 | 20 +++++++++++++++++++
flang/test/Lower/OpenMP/force-usm.f90 | 12 +++++++++++
.../test/Lower/OpenMP/requires-force-usm.f90 | 15 ++++++++++++++
flang/tools/bbc/bbc.cpp | 15 +++++++++-----
10 files changed, 81 insertions(+), 14 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/force-usm.f90
create mode 100644 flang/test/Lower/OpenMP/requires-force-usm.f90
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 57f37c5023110..eefec33e6d333 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3574,7 +3574,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
HelpText<"Do not create a host fallback if offloading to the device fails.">,
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
- Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
+ Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 42ca060186fd8..b7abe7b1c19bc 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -766,6 +766,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
// FIXME: Clang supports a whole bunch more flags here.
+ if (Args.hasArg(options::OPT_fopenmp_force_usm))
+ CmdArgs.push_back("-fopenmp-force-usm");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def
index 2bf10826120a8..d3e1e972d1519 100644
--- a/flang/include/flang/Frontend/LangOptions.def
+++ b/flang/include/flang/Frontend/LangOptions.def
@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
LANGOPT(OpenMPIsTargetDevice, 1, false)
/// Generate OpenMP target code only for GPUs
LANGOPT(OpenMPIsGPU, 1, false)
+/// Generate OpenMP target code only for GPUs
+LANGOPT(OpenMPForceUSM, 1, false)
/// Enable debugging in the OpenMP offloading device RTL
LANGOPT(OpenMPTargetDebug, 32, 0)
/// Assume work-shared loops do not have more iterations than participating
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 77b68fc6187fa..26fbe51d329c7 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
- bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
- bool NoGPULib = false)
+ bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
+ std::string OMPHostIRFile = {}, bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
- OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
- NoGPULib(NoGPULib) {}
+ OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
+ OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
OpenMPNoThreadState(Opts.OpenMPNoThreadState),
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
- OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
- OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
+ OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
+ OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
+ NoGPULib(Opts.NoGPULib) {}
uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
bool OpenMPNoNestedParallelism = false;
bool OpenMPIsTargetDevice = false;
bool OpenMPIsGPU = false;
+ bool OpenMPForceUSM = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
bool NoGPULib = false;
@@ -172,13 +174,17 @@ struct OffloadModuleOpts {
module.getOperation())) {
offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
offloadMod.setIsGPU(Opts.OpenMPIsGPU);
+ if (Opts.OpenMPForceUSM) {
+ offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
+ }
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);
- if (!Opts.OMPHostIRFile.empty())
+ if (!Opts.OMPHostIRFile.empty()) {
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
+ }
}
}
}
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 50c3e8b0113b5..f64a939b785ef 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
res.getLangOpts().OpenMPVersion, diags)) {
res.getLangOpts().OpenMPVersion = Version;
}
+ if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
+ res.getLangOpts().OpenMPForceUSM = 1;
+ }
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
res.getLangOpts().OpenMPIsTargetDevice = 1;
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9598457d123cf..af9e2af24619b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
symbol->details());
}
- MlirRequires mlirFlags = MlirRequires::none;
+ // Use pre-populated omp.requires module attribute if it was set, so that
+ // the "-fopenmp-force-usm" compiler option is honored.
+ MlirRequires mlirFlags = offloadMod.getRequires();
if (semaFlags.test(SemaRequires::ReverseOffload))
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
if (semaFlags.test(SemaRequires::UnifiedAddress))
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 8f48ca75114ce..6fb4f4eeeeca1 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -207,3 +207,23 @@
! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
! RUN: | FileCheck --check-prefix=ROCM-PATH %s
! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
+
+! Test -fopenmp-force-usm option without offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
+
+! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+
+! Test -fopenmp-force-usm option with offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
+
+! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
diff --git a/flang/test/Lower/OpenMP/force-usm.f90 b/flang/test/Lower/OpenMP/force-usm.f90
new file mode 100644
index 0000000000000..90bbf3c4d842f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/force-usm.f90
@@ -0,0 +1,12 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set
+
+!CHECK: module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
+program requires
+end program requires
diff --git a/flang/test/Lower/OpenMP/requires-force-usm.f90 b/flang/test/Lower/OpenMP/requires-force-usm.f90
new file mode 100644
index 0000000000000..5f5cf9e64cd70
--- /dev/null
+++ b/flang/test/Lower/OpenMP/requires-force-usm.f90
@@ -0,0 +1,15 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set, even when other requires directives are present
+
+!CHECK: module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
+program requires
+ !$omp requires reverse_offload
+ !$omp target
+ !$omp end target
+end program requires
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index bab21338cef26..3485c1499d3b6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
llvm::cl::desc("enable openmp GPU target codegen"),
llvm::cl::init(false));
+static llvm::cl::opt<bool> enableOpenMPForceUSM(
+ "fopenmp-force-usm",
+ llvm::cl::desc("force openmp unified shared memory mode"),
+ llvm::cl::init(false));
+
// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
// positive options are available, no negative options e.g. fopen_assume* vs
// fno_open_assume*
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
"-fopenmp-is-target-device is also set";
return mlir::failure();
}
- auto offloadModuleOpts =
- OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
- setOpenMPThreadSubscription, setOpenMPNoThreadState,
- setOpenMPNoNestedParallelism, enableOpenMPDevice,
- enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
+ auto offloadModuleOpts = OffloadModuleOpts(
+ setOpenMPTargetDebug, setOpenMPTeamSubscription,
+ setOpenMPThreadSubscription, setOpenMPNoThreadState,
+ setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
+ enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
}
More information about the cfe-commits
mailing list