[flang-commits] [clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

Dominik Adamski via flang-commits flang-commits at lists.llvm.org
Fri Jul 26 00:49:29 PDT 2024


https://github.com/DominikAdamski updated https://github.com/llvm/llvm-project/pull/96742

>From 5b487aac3c8414b6f37f6888f361ca7488094048 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Fri, 21 Jun 2024 18:03:53 +0200
Subject: [PATCH 1/5] [Flang-new][OpenMP] Add offload related flags for AMDGPU

Flang-new needs to add mlink-builtin-bitcode objects
to properly support offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently.
In the future it will be needed for Flang equivalent function:
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace.
---
 clang/include/clang/Driver/Options.td     |  4 +-
 clang/lib/Driver/ToolChains/Flang.cpp     |  3 ++
 flang/test/Driver/omp-driver-offload.f90  | 58 +++++++++++++----------
 flang/test/Driver/target-cpu-features.f90 |  4 +-
 flang/test/Driver/target-gpu-features.f90 |  2 +-
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index dd55838dcf384..612d5793232ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8016,7 +8016,7 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">,
 // CUDA Options
 //===----------------------------------------------------------------------===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8031,7 +8031,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">,
   MarshallingInfoNegativeFlag<LangOpts<"CUDAHostDeviceConstexpr">>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 //===----------------------------------------------------------------------===//
 // OpenMP Options
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..2679f284c5016 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
     StringRef Val = A->getValue();
     CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 6fb4f4eeeeca1..da81a6ee3ba8f 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -14,12 +14,12 @@
 ! Test regular -fopenmp with offload, and invocation filtering options
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-host-device \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! OFFLOAD-HOST-AND-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
@@ -29,7 +29,7 @@
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-host-only \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST
 
 ! OFFLOAD-HOST: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
@@ -39,7 +39,7 @@
 
 ! RUN: %flang -S -### %s 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-device-only \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-DEVICE
 
 ! OFFLOAD-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
@@ -48,13 +48,13 @@
 ! OFFLOAD-DEVICE-NOT: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
 
 ! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag addition and correct fopenmp 
-! RUN: %flang -### -fopenmp --offload-arch=gfx90a -fopenmp-targets=amdgcn-amd-amdhsa %s 2>&1 | FileCheck --check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
+! RUN: %flang -### -fopenmp --offload-arch=gfx90a -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib %s 2>&1 | FileCheck --check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
 ! CHECK-OPENMP-IS-TARGET-DEVICE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" {{.*}}.f90"
 
 ! Testing appropriate flags are gnerated and appropriately assigned by the driver when offloading
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OPENMP-OFFLOAD-ARGS
 ! OPENMP-OFFLOAD-ARGS: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" {{.*}} "-fopenmp" {{.*}}.f90"
 ! OPENMP-OFFLOAD-ARGS-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
@@ -70,23 +70,23 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
-! RUN: -fopenmp-assume-threads-oversubscription \
+! RUN: -fopenmp-assume-threads-oversubscription -nogpulib \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREADS-OVS
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-assume-threads-oversubscription \
+! RUN: -fopenmp-assume-threads-oversubscription -nogpulib \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREADS-OVS
 ! CHECK-THREADS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-threads-oversubscription" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
-! RUN: -fopenmp-assume-teams-oversubscription  \
+! RUN: -fopenmp-assume-teams-oversubscription  -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
-! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib\
 ! RUN: -fopenmp-assume-teams-oversubscription  \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS
 ! CHECK-TEAMS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-teams-oversubscription" {{.*}}.f90"
@@ -94,48 +94,48 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
-! RUN: -fopenmp-assume-no-nested-parallelism  \
+! RUN: -fopenmp-assume-no-nested-parallelism  -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-NEST-PAR
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-assume-no-nested-parallelism  \
+! RUN: -fopenmp-assume-no-nested-parallelism  -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-NEST-PAR
 ! CHECK-NEST-PAR: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-no-nested-parallelism" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
-! RUN: -fopenmp-assume-no-thread-state \
+! RUN: -fopenmp-assume-no-thread-state -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREAD-STATE
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-assume-no-thread-state \
+! RUN: -fopenmp-assume-no-thread-state -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREAD-STATE
 ! CHECK-THREAD-STATE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-no-thread-state" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
-! RUN: -fopenmp-target-debug \
+! RUN: -fopenmp-target-debug -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-target-debug \
+! RUN: -fopenmp-target-debug -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! CHECK-TARGET-DEBUG: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
-! RUN: -fopenmp-target-debug \
+! RUN: -fopenmp-target-debug -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-target-debug \
+! RUN: -fopenmp-target-debug -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! CHECK-TARGET-DEBUG-EQ: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug=111" {{.*}}.f90"
 
@@ -144,14 +144,14 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-target-debug -fopenmp-assume-threads-oversubscription \
 ! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism \
-! RUN: -fopenmp-assume-no-thread-state \
+! RUN: -fopenmp-assume-no-thread-state -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-RTL-ALL
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
 ! RUN: -fopenmp-target-debug -fopenmp-assume-threads-oversubscription \
 ! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism \
-! RUN: -fopenmp-assume-no-thread-state \
+! RUN: -fopenmp-assume-no-thread-state -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-RTL-ALL
 ! CHECK-RTL-ALL: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug" "-fopenmp-assume-teams-oversubscription"
 ! CHECK-RTL-ALL: "-fopenmp-assume-threads-oversubscription" "-fopenmp-assume-no-thread-state" "-fopenmp-assume-no-nested-parallelism"
@@ -160,12 +160,12 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a \
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
-! RUN: -fopenmp-version=45 \
+! RUN: -fopenmp-version=45 -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-OPENMP-VERSION
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-version=45 \
+! RUN: -fopenmp-version=45 -nogpulib\
 ! RUN: | FileCheck %s --check-prefixes=CHECK-OPENMP-VERSION
 ! CHECK-OPENMP-VERSION: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" "-fopenmp-version=45" {{.*}}.f90"
 
@@ -190,7 +190,7 @@
 ! NO-LIBC-GPU-NVPTX-NOT: "-lcgpu-nvptx"
 
 ! RUN:   %flang -### --target=x86_64-unknown-linux-gnu -fopenmp  \
-! RUN:      --offload-arch=gfx90a \
+! RUN:      --offload-arch=gfx900 --rocm-path=%S/Inputs/rocm  \
 ! RUN:      -gpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=LIBC-GPU-AMDGPU %s
 ! LIBC-GPU-AMDGPU-DAG: "-lcgpu-amdgpu"
@@ -198,7 +198,7 @@
 
 ! RUN:   %flang -### --target=x86_64-unknown-linux-gnu -fopenmp  \
 ! RUN:      --offload-arch=gfx90a \
-! RUN:      -nogpulibc %s 2>&1 \
+! RUN:      -nogpulibc -nogpulib %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
 
@@ -220,10 +220,18 @@
 ! Test -fopenmp-force-usm option with offload
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
 
 ! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
 ! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
 ! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
 ! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+
+! RUN:   %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:      --offload-arch=gfx900 \
+! RUN:      --rocm-path=%S/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=MLINK-BUILTIN-BITCODE  %s
+! MLINK-BUILTIN-BITCODE:      "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! MLINK-BUILTIN-BITCODE-SAME: "-fcuda-is-device"
+! MLINK-BUILTIN-BITCODE-SAME: "-mlink-builtin-bitcode" {{.*Inputs.*rocm.*amdgcn.*bitcode.*}}oclc_isa_version_900.bc
diff --git a/flang/test/Driver/target-cpu-features.f90 b/flang/test/Driver/target-cpu-features.f90
index eea7a0f665b34..215d323ba2038 100644
--- a/flang/test/Driver/target-cpu-features.f90
+++ b/flang/test/Driver/target-cpu-features.f90
@@ -23,10 +23,10 @@
 ! RUN: %flang --target=riscv64-linux-gnu -c %s -### 2>&1 \
 ! RUN: | FileCheck %s -check-prefix=CHECK-RV64
 
-! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx908 -c %s -### 2>&1 \
+! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx908 -nogpulib -c %s -### 2>&1 \
 ! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU
 
-! RUN: %flang --target=r600-unknown-unknown -mcpu=cayman -c %s -### 2>&1 \
+! RUN: %flang --target=r600-unknown-unknown -mcpu=cayman -nogpulib -c %s -### 2>&1 \
 ! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU-R600
 
 ! CHECK-A57: "-fc1" "-triple" "aarch64-unknown-linux-gnu"
diff --git a/flang/test/Driver/target-gpu-features.f90 b/flang/test/Driver/target-gpu-features.f90
index 9cc9ce4baaf4d..b783574370a0f 100644
--- a/flang/test/Driver/target-gpu-features.f90
+++ b/flang/test/Driver/target-gpu-features.f90
@@ -3,7 +3,7 @@
 ! Test that -mcpu are used and that the -target-cpu and -target-features
 ! are also added to the fc1 command.
 
-! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx902 -c %s -### 2>&1 \
+! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx902  -nogpulib -c %s -### 2>&1 \
 ! RUN: | FileCheck %s -check-prefix=CHECK-AMDGCN
 
 ! CHECK-AMDGCN: "-fc1" "-triple" "amdgcn-amd-amdhsa"

>From 8252dbee79e47a3dbf9d72c09621e73facd9e5ca Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 1 Jul 2024 05:45:46 -0500
Subject: [PATCH 2/5] Fixes after #96909

---
 clang/include/clang/Driver/Options.td    | 4 ++--
 flang/test/Driver/omp-driver-offload.f90 | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index b0538b3007944..1c2b8cfeef6ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8024,7 +8024,7 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">,
 // CUDA Options
 //===----------------------------------------------------------------------===//
 
-let Visibility = [CC1Option, FC1Option] in {
+let Visibility = [CC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8039,7 +8039,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">,
   MarshallingInfoNegativeFlag<LangOpts<"CUDAHostDeviceConstexpr">>;
 
-} // let Visibility = [CC1Option, FC1Option]
+} // let Visibility = [CC1Option]
 
 //===----------------------------------------------------------------------===//
 // OpenMP Options
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index da81a6ee3ba8f..8ce60b53fbcdb 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -233,5 +233,4 @@
 ! RUN:      --rocm-path=%S/Inputs/rocm %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=MLINK-BUILTIN-BITCODE  %s
 ! MLINK-BUILTIN-BITCODE:      "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
-! MLINK-BUILTIN-BITCODE-SAME: "-fcuda-is-device"
 ! MLINK-BUILTIN-BITCODE-SAME: "-mlink-builtin-bitcode" {{.*Inputs.*rocm.*amdgcn.*bitcode.*}}oclc_isa_version_900.bc

>From efd405612a2499ede56d0466fc0cc4fe84e324a4 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 1 Jul 2024 08:01:22 -0500
Subject: [PATCH 3/5] apply nogpulib only for AMD GPU tests

---
 flang/test/Driver/omp-driver-offload.f90 | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 8ce60b53fbcdb..95e2a8fdc4aa5 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -75,7 +75,7 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-assume-threads-oversubscription -nogpulib \
+! RUN: -fopenmp-assume-threads-oversubscription  \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREADS-OVS
 ! CHECK-THREADS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-threads-oversubscription" {{.*}}.f90"
 
@@ -86,7 +86,7 @@
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
-! RUN: -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib\
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
 ! RUN: -fopenmp-assume-teams-oversubscription  \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS
 ! CHECK-TEAMS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-teams-oversubscription" {{.*}}.f90"
@@ -99,7 +99,7 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-assume-no-nested-parallelism  -nogpulib\
+! RUN: -fopenmp-assume-no-nested-parallelism  \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-NEST-PAR
 ! CHECK-NEST-PAR: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-no-nested-parallelism" {{.*}}.f90"
 
@@ -111,7 +111,7 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-assume-no-thread-state -nogpulib\
+! RUN: -fopenmp-assume-no-thread-state \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREAD-STATE
 ! CHECK-THREAD-STATE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-no-thread-state" {{.*}}.f90"
 
@@ -123,7 +123,7 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-target-debug -nogpulib\
+! RUN: -fopenmp-target-debug \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! CHECK-TARGET-DEBUG: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug" {{.*}}.f90"
 
@@ -135,7 +135,7 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-target-debug -nogpulib\
+! RUN: -fopenmp-target-debug \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! CHECK-TARGET-DEBUG-EQ: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug=111" {{.*}}.f90"
 
@@ -151,7 +151,7 @@
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
 ! RUN: -fopenmp-target-debug -fopenmp-assume-threads-oversubscription \
 ! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism \
-! RUN: -fopenmp-assume-no-thread-state -nogpulib\
+! RUN: -fopenmp-assume-no-thread-state \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-RTL-ALL
 ! CHECK-RTL-ALL: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug" "-fopenmp-assume-teams-oversubscription"
 ! CHECK-RTL-ALL: "-fopenmp-assume-threads-oversubscription" "-fopenmp-assume-no-thread-state" "-fopenmp-assume-no-nested-parallelism"
@@ -165,7 +165,7 @@
 ! RUN: %flang -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=sm_70 \
 ! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
-! RUN: -fopenmp-version=45 -nogpulib\
+! RUN: -fopenmp-version=45 \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-OPENMP-VERSION
 ! CHECK-OPENMP-VERSION: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" "-fopenmp-version=45" {{.*}}.f90"
 

>From f2e362ec99d61cb7c9dc525dcc66053e7807d232 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Wed, 3 Jul 2024 06:37:59 -0500
Subject: [PATCH 4/5] Revert "Fixes after #96909"

This reverts commit 8252dbee79e47a3dbf9d72c09621e73facd9e5ca.
---
 clang/include/clang/Driver/Options.td    | 4 ++--
 flang/test/Driver/omp-driver-offload.f90 | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1ede75d3782cd..7a037934160fc 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8024,7 +8024,7 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">,
 // CUDA Options
 //===----------------------------------------------------------------------===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8039,7 +8039,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">,
   MarshallingInfoNegativeFlag<LangOpts<"CUDAHostDeviceConstexpr">>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 //===----------------------------------------------------------------------===//
 // OpenMP Options
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 95e2a8fdc4aa5..d9bdb4c980b81 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -233,4 +233,5 @@
 ! RUN:      --rocm-path=%S/Inputs/rocm %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=MLINK-BUILTIN-BITCODE  %s
 ! MLINK-BUILTIN-BITCODE:      "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! MLINK-BUILTIN-BITCODE-SAME: "-fcuda-is-device"
 ! MLINK-BUILTIN-BITCODE-SAME: "-mlink-builtin-bitcode" {{.*Inputs.*rocm.*amdgcn.*bitcode.*}}oclc_isa_version_900.bc

>From 7bddef762b83029d3f33ab988a006d25926bf4e3 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Thu, 18 Jul 2024 04:20:39 -0500
Subject: [PATCH 5/5] Revert "Revert "Fixes after #96909""

This reverts commit f2e362ec99d61cb7c9dc525dcc66053e7807d232.

PR: https://github.com/llvm/llvm-project/pull/99002 causes
that -fcuda-is-device is not added for AMD GPU OpenMP.
---
 clang/include/clang/Driver/Options.td    | 4 ++--
 flang/test/Driver/omp-driver-offload.f90 | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7a037934160fc..1ede75d3782cd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8024,7 +8024,7 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">,
 // CUDA Options
 //===----------------------------------------------------------------------===//
 
-let Visibility = [CC1Option, FC1Option] in {
+let Visibility = [CC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8039,7 +8039,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">,
   MarshallingInfoNegativeFlag<LangOpts<"CUDAHostDeviceConstexpr">>;
 
-} // let Visibility = [CC1Option, FC1Option]
+} // let Visibility = [CC1Option]
 
 //===----------------------------------------------------------------------===//
 // OpenMP Options
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index d9bdb4c980b81..95e2a8fdc4aa5 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -233,5 +233,4 @@
 ! RUN:      --rocm-path=%S/Inputs/rocm %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=MLINK-BUILTIN-BITCODE  %s
 ! MLINK-BUILTIN-BITCODE:      "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
-! MLINK-BUILTIN-BITCODE-SAME: "-fcuda-is-device"
 ! MLINK-BUILTIN-BITCODE-SAME: "-mlink-builtin-bitcode" {{.*Inputs.*rocm.*amdgcn.*bitcode.*}}oclc_isa_version_900.bc



More information about the flang-commits mailing list