[clang] [flang] [Flang][OpenMP] Add -fopenmp-force-usm option to flang (PR #94359)

Sergio Afonso via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 4 07:36:42 PDT 2024


https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/94359

This patch enables the `-fopenmp-force-usm` option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of the `unified_shared_memory` bit to the `omp.requires` attribute of the top-level module operation.

This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit.

>From 369850438197a4176c9fe2689ad9e8032ead5488 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 4 Jun 2024 15:26:38 +0100
Subject: [PATCH] [Flang][OpenMP] Add -fopenmp-force-usm option to flang

This patch enables the `-fopenmp-force-usm` option to be passed to the flang
driver, which forwards it to the compiler frontend. This flag, when set,
results in the introduction of the `unified_shared_memory` bit to the
`omp.requires` attribute of the top-level module operation.

This is later combined with any other target device-related REQUIRES clauses
that may have been explicitly set in the compilation unit.
---
 clang/include/clang/Driver/Options.td         |  2 +-
 clang/lib/Driver/ToolChains/Flang.cpp         |  2 ++
 flang/include/flang/Frontend/LangOptions.def  |  2 ++
 flang/include/flang/Tools/CrossToolHelpers.h  | 20 ++++++++++++-------
 flang/lib/Frontend/CompilerInvocation.cpp     |  3 +++
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  4 +++-
 flang/test/Driver/omp-driver-offload.f90      | 20 +++++++++++++++++++
 flang/test/Lower/OpenMP/force-usm.f90         | 12 +++++++++++
 .../test/Lower/OpenMP/requires-force-usm.f90  | 15 ++++++++++++++
 flang/tools/bbc/bbc.cpp                       | 15 +++++++++-----
 10 files changed, 81 insertions(+), 14 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/force-usm.f90
 create mode 100644 flang/test/Lower/OpenMP/requires-force-usm.f90

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 57f37c5023110..eefec33e6d333 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3574,7 +3574,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
   HelpText<"Do not create a host fallback if offloading to the device fails.">,
   MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
 def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
-  Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
+  Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
   HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
   MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
 def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 42ca060186fd8..b7abe7b1c19bc 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -766,6 +766,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
       Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
 
       // FIXME: Clang supports a whole bunch more flags here.
+      if (Args.hasArg(options::OPT_fopenmp_force_usm))
+        CmdArgs.push_back("-fopenmp-force-usm");
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code
diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def
index 2bf10826120a8..d3e1e972d1519 100644
--- a/flang/include/flang/Frontend/LangOptions.def
+++ b/flang/include/flang/Frontend/LangOptions.def
@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
 LANGOPT(OpenMPIsTargetDevice, 1, false)
 /// Generate OpenMP target code only for GPUs
 LANGOPT(OpenMPIsGPU, 1, false)
+/// Generate OpenMP target code only for GPUs
+LANGOPT(OpenMPForceUSM, 1, false)
 /// Enable debugging in the OpenMP offloading device RTL
 LANGOPT(OpenMPTargetDebug, 32, 0)
 /// Assume work-shared loops do not have more iterations than participating
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 77b68fc6187fa..26fbe51d329c7 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
   OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
       bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
       bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
-      bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
-      bool NoGPULib = false)
+      bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
+      std::string OMPHostIRFile = {}, bool NoGPULib = false)
       : OpenMPTargetDebug(OpenMPTargetDebug),
         OpenMPTeamSubscription(OpenMPTeamSubscription),
         OpenMPThreadSubscription(OpenMPThreadSubscription),
         OpenMPNoThreadState(OpenMPNoThreadState),
         OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
         OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
-        OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
-        NoGPULib(NoGPULib) {}
+        OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
+        OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
 
   OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
       : OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
         OpenMPNoThreadState(Opts.OpenMPNoThreadState),
         OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
         OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
-        OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
-        OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
+        OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
+        OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
+        NoGPULib(Opts.NoGPULib) {}
 
   uint32_t OpenMPTargetDebug = 0;
   bool OpenMPTeamSubscription = false;
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
   bool OpenMPNoNestedParallelism = false;
   bool OpenMPIsTargetDevice = false;
   bool OpenMPIsGPU = false;
+  bool OpenMPForceUSM = false;
   uint32_t OpenMPVersion = 11;
   std::string OMPHostIRFile = {};
   bool NoGPULib = false;
@@ -172,13 +174,17 @@ struct OffloadModuleOpts {
           module.getOperation())) {
     offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
     offloadMod.setIsGPU(Opts.OpenMPIsGPU);
+    if (Opts.OpenMPForceUSM) {
+      offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
+    }
     if (Opts.OpenMPIsTargetDevice) {
       offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
           Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
           Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);
 
-      if (!Opts.OMPHostIRFile.empty())
+      if (!Opts.OMPHostIRFile.empty()) {
         offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
+      }
     }
   }
 }
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 50c3e8b0113b5..f64a939b785ef 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
             res.getLangOpts().OpenMPVersion, diags)) {
       res.getLangOpts().OpenMPVersion = Version;
     }
+    if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
+      res.getLangOpts().OpenMPForceUSM = 1;
+    }
     if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
       res.getLangOpts().OpenMPIsTargetDevice = 1;
 
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9598457d123cf..af9e2af24619b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
           symbol->details());
     }
 
-    MlirRequires mlirFlags = MlirRequires::none;
+    // Use pre-populated omp.requires module attribute if it was set, so that
+    // the "-fopenmp-force-usm" compiler option is honored.
+    MlirRequires mlirFlags = offloadMod.getRequires();
     if (semaFlags.test(SemaRequires::ReverseOffload))
       mlirFlags = mlirFlags | MlirRequires::reverse_offload;
     if (semaFlags.test(SemaRequires::UnifiedAddress))
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 8f48ca75114ce..6fb4f4eeeeca1 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -207,3 +207,23 @@
 ! RUN:      --rocm-path=%S/Inputs/rocm %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=ROCM-PATH %s
 ! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
+
+! Test -fopenmp-force-usm option without offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN:   | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
+
+! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+
+! Test -fopenmp-force-usm option with offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN:   | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
+
+! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
diff --git a/flang/test/Lower/OpenMP/force-usm.f90 b/flang/test/Lower/OpenMP/force-usm.f90
new file mode 100644
index 0000000000000..90bbf3c4d842f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/force-usm.f90
@@ -0,0 +1,12 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set
+
+!CHECK:      module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
+program requires
+end program requires
diff --git a/flang/test/Lower/OpenMP/requires-force-usm.f90 b/flang/test/Lower/OpenMP/requires-force-usm.f90
new file mode 100644
index 0000000000000..5f5cf9e64cd70
--- /dev/null
+++ b/flang/test/Lower/OpenMP/requires-force-usm.f90
@@ -0,0 +1,15 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set, even when other requires directives are present
+
+!CHECK:      module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
+program requires
+  !$omp requires reverse_offload
+  !$omp target
+  !$omp end target
+end program requires
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index bab21338cef26..3485c1499d3b6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
                     llvm::cl::desc("enable openmp GPU target codegen"),
                     llvm::cl::init(false));
 
+static llvm::cl::opt<bool> enableOpenMPForceUSM(
+    "fopenmp-force-usm",
+    llvm::cl::desc("force openmp unified shared memory mode"),
+    llvm::cl::init(false));
+
 // A simplified subset of the OpenMP RTL Flags from Flang, only the primary
 // positive options are available, no negative options e.g. fopen_assume* vs
 // fno_open_assume*
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
                       "-fopenmp-is-target-device is also set";
       return mlir::failure();
     }
-    auto offloadModuleOpts =
-        OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
-                          setOpenMPThreadSubscription, setOpenMPNoThreadState,
-                          setOpenMPNoNestedParallelism, enableOpenMPDevice,
-                          enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
+    auto offloadModuleOpts = OffloadModuleOpts(
+        setOpenMPTargetDebug, setOpenMPTeamSubscription,
+        setOpenMPThreadSubscription, setOpenMPNoThreadState,
+        setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
+        enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
     setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
     setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
   }



More information about the cfe-commits mailing list