[clang] [clang][OpenMP] Add OpenMP GPU optimization flag framework (PR #178914)
Nicole Aschenbrenner via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 4 06:00:08 PST 2026
https://github.com/nicebert updated https://github.com/llvm/llvm-project/pull/178914
>From bf77711c128a74e835cf96287c7fa6974bd9baed Mon Sep 17 00:00:00 2001
From: Nicole Aschenbrenner <nicole.aschenbrenner at amd.com>
Date: Tue, 27 Jan 2026 09:54:06 -0600
Subject: [PATCH] [clang][OpenMP] Add -fopenmp-target-fast convenience flag
Add meta-flag that implies multiple GPU runtime assumptions:
- -fopenmp-assume-no-thread-state
- -fopenmp-assume-no-nested-parallelism
The flag is automatically enabled by -Ofast and can be explicitly
disabled with -fno-openmp-target-fast. Individual implied flags can
be selectively overridden while keeping others enabled.
Testing: Added Driver/openmp-target-fast-flag.c to verify flag
handling and implication logic.
---
clang/include/clang/Options/Options.td | 11 ++++++
clang/lib/Driver/ToolChains/Clang.cpp | 42 ++++++++++++++++++++-
clang/test/Driver/openmp-target-fast-flag.c | 39 +++++++++++++++++++
3 files changed, 90 insertions(+), 2 deletions(-)
create mode 100644 clang/test/Driver/openmp-target-fast-flag.c
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 421208a812bbc..8ae1e9767b910 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -3986,9 +3986,13 @@ def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-
def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">,
HelpText<"Assert no thread in a parallel region modifies an ICV">,
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
+def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">,
+ HelpText<"Assert that a thread in a parallel region may modify an ICV">;
def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">,
HelpText<"Assert no nested parallel regions in the GPU">,
MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
+def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">,
+ HelpText<"Assert that a nested parallel region may be used in the GPU">;
} // let Group = f_Group
} // let Visibility = [ClangOption, CC1Option, FC1Option]
@@ -4012,6 +4016,13 @@ def fopenmp_target_new_runtime : Flag<["-"], "fopenmp-target-new-runtime">,
Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
def fno_openmp_target_new_runtime : Flag<["-"], "fno-openmp-target-new-runtime">,
Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_target_fast : Flag<["-"], "fopenmp-target-fast">,
+ Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+ Visibility<[ClangOption, CC1Option]>,
+ HelpText<"Assert common GPU usage patterns to enable OpenMP runtime optimizations">;
+def fno_openmp_target_fast : Flag<["-"], "fno-openmp-target-fast">,
+ Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+ Visibility<[ClangOption, CC1Option]>;
defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option]>,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 0293b04217673..33ec50b148cf9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6675,6 +6675,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_offload_via_llvm, false) &&
(JA.isDeviceOffloading(Action::OFK_None) ||
JA.isDeviceOffloading(Action::OFK_OpenMP))) {
+
+ // Determine if target-fast optimizations should be enabled
+ bool TargetFastUsed =
+ Args.hasFlag(options::OPT_fopenmp_target_fast,
+ options::OPT_fno_openmp_target_fast, OFastEnabled);
switch (D.getOpenMPRuntime(Args)) {
case Driver::OMPRT_OMP:
case Driver::OMPRT_IOMP5:
@@ -6725,10 +6730,43 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_openmp_assume_threads_oversubscription,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
- if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
+
+ // Handle -fopenmp-target-fast
+ if (Arg *A = Args.getLastArg(options::OPT_fopenmp_target_fast,
+ options::OPT_fno_openmp_target_fast)) {
+ if (A->getOption().matches(options::OPT_fopenmp_target_fast))
+ CmdArgs.push_back("-fopenmp-target-fast");
+ else
+ CmdArgs.push_back("-fno-openmp-target-fast");
+ } else if (OFastEnabled) {
+ CmdArgs.push_back("-fopenmp-target-fast");
+ }
+
+ // Handle -fopenmp-assume-no-thread-state (implied by target-fast)
+ if (Arg *A =
+ Args.getLastArg(options::OPT_fopenmp_assume_no_thread_state,
+ options::OPT_fno_openmp_assume_no_thread_state)) {
+ if (A->getOption().matches(options::OPT_fopenmp_assume_no_thread_state))
+ CmdArgs.push_back("-fopenmp-assume-no-thread-state");
+ else
+ CmdArgs.push_back("-fno-openmp-assume-no-thread-state");
+ } else if (TargetFastUsed) {
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
- if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
+ }
+
+ // Handle -fopenmp-assume-no-nested-parallelism (implied by target-fast)
+ if (Arg *A = Args.getLastArg(
+ options::OPT_fopenmp_assume_no_nested_parallelism,
+ options::OPT_fno_openmp_assume_no_nested_parallelism)) {
+ if (A->getOption().matches(
+ options::OPT_fopenmp_assume_no_nested_parallelism))
+ CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
+ else
+ CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism");
+ } else if (TargetFastUsed) {
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
+ }
+
if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
CmdArgs.push_back("-fopenmp-offload-mandatory");
if (Args.hasArg(options::OPT_fopenmp_force_usm))
diff --git a/clang/test/Driver/openmp-target-fast-flag.c b/clang/test/Driver/openmp-target-fast-flag.c
new file mode 100644
index 0000000000000..9ad849bf21ead
--- /dev/null
+++ b/clang/test/Driver/openmp-target-fast-flag.c
@@ -0,0 +1,39 @@
+// REQUIRES: x86-registered-target, amdgpu-registered-target
+
+// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib %s -O0 2>&1 \
+// RUN: | FileCheck -check-prefixes=DefaultTFast,DefaultTState,DefaultNoNestParallel %s
+
+// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 -fopenmp-target-fast %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=TFast,TState,NestParallel %s
+
+// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=O3,DefaultTFast,DefaultTState,DefaultNoNestParallel %s
+
+// RUN: %clang -### -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 -fno-openmp-target-fast %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=O3,NoTFast,DefaultTState,DefaultNoNestParallel %s
+
+// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=OFast,TFast,TState,NestParallel %s
+
+// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast -fno-openmp-target-fast %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=OFast,NoTFast,DefaultTState,DefaultNoNestParallel %s
+
+// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 -fno-openmp-target-fast -fopenmp-target-fast %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=TFast,TState,NestParallel %s
+
+// O3: -O3
+// OFast: -Ofast
+
+// TFast: "-fopenmp-target-fast"
+// TFast-NOT: "-fno-openmp-target-fast"
+// NoTFast: "-fno-openmp-target-fast"
+// NoTFast-NOT: "-fopenmp-target-fast"
+// DefaultTFast-NOT: {{"-f(no-)?openmp-target-fast"}}
+
+// TState: "-fopenmp-assume-no-thread-state"
+// TState-NOT: "-fno-openmp-assume-no-thread-state"
+// DefaultTState-NOT: {{"-f(no-)?openmp-assume-no-thread-state"}}
+
+// NestParallel: "-fopenmp-assume-no-nested-parallelism"
+// NestParallel-NOT: "-fno-openmp-assume-no-nested-parallelism"
+// DefaultNoNestParallel-NOT: {{"-f(-no-)?openmp-assume-no-nested-parallelism"}}
More information about the cfe-commits
mailing list