[clang] [llvm] [Offload] Move HIP and CUDA to new driver by default (PR #84420)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 11 09:27:45 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/84420

>From 8d63e56aa5af8b86d757d1f1ff68267d3dd1ccd4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 7 Mar 2024 15:48:00 -0600
Subject: [PATCH] [Offload] Move HIP and CUDA to new driver by default

Summary:
This patch updates the `--offload-new-driver` flag to be default for all
current offloading languages. This mostly just required updating a lot
of tests to use the old format. I tried to update them where possible,
but some were directly checking the old format.

This is not intended to be landed immediately, but to allow for greater
testing. One potential issue I've discovered is the lack of SPIR-V
support or handling for `--offload`.
---
 clang/lib/Driver/Driver.cpp                   |  8 +++---
 clang/lib/Driver/ToolChains/Clang.cpp         | 19 ++++++--------
 clang/test/Driver/cl-offload.cu               |  5 ++--
 clang/test/Driver/cuda-arch-translation.cu    | 26 +++++++++----------
 clang/test/Driver/cuda-bindings.cu            | 24 ++++++++---------
 clang/test/Driver/cuda-options.cu             | 23 ++++++++--------
 clang/test/Driver/cuda-output-asm.cu          |  4 ---
 clang/test/Driver/hip-gz-options.hip          |  1 -
 clang/test/Driver/hip-invalid-target-id.hip   |  4 +--
 clang/test/Driver/hip-macros.hip              |  3 ---
 clang/test/Driver/hip-offload-arch.hip        |  4 +--
 clang/test/Driver/hip-options.hip             |  6 +----
 clang/test/Driver/hip-sanitize-options.hip    |  2 +-
 clang/test/Driver/hip-save-temps.hip          | 12 ++++-----
 .../test/Driver/hip-toolchain-device-only.hip |  4 ---
 clang/test/Driver/hip-toolchain-mllvm.hip     |  2 --
 clang/test/Driver/hip-toolchain-no-rdc.hip    |  2 +-
 clang/test/Driver/invalid-offload-options.cpp |  2 +-
 .../ClangLinkerWrapper.cpp                    |  9 +++++--
 clang/unittests/Tooling/ToolingTest.cpp       |  6 ++---
 llvm/lib/Object/OffloadBinary.cpp             | 13 +++++++---
 21 files changed, 83 insertions(+), 96 deletions(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 93e85f7dffe35a0..212af24f4fe8cd7 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4183,11 +4183,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
   handleArguments(C, Args, Inputs, Actions);
 
   bool UseNewOffloadingDriver =
-      C.isOffloadingHostKind(Action::OFK_OpenMP) ||
-      Args.hasFlag(options::OPT_foffload_via_llvm,
-                   options::OPT_fno_offload_via_llvm, false) ||
+      C.getActiveOffloadKinds() != Action::OFK_None &&
       Args.hasFlag(options::OPT_offload_new_driver,
-                   options::OPT_no_offload_new_driver, false);
+                   options::OPT_no_offload_new_driver, true);
 
   // Builder to be used to build offloading actions.
   std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -4913,7 +4911,7 @@ Action *Driver::ConstructPhaseAction(
                    offloadDeviceOnly() ||
                    (TargetDeviceOffloadKind == Action::OFK_HIP &&
                     !Args.hasFlag(options::OPT_offload_new_driver,
-                                  options::OPT_no_offload_new_driver, false)))
+                                  options::OPT_no_offload_new_driver, true)))
               ? types::TY_LLVM_IR
               : types::TY_LLVM_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 0952262c360185e..6f0895a7c97ffe2 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5041,8 +5041,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool IsHostOffloadingAction =
       JA.isHostOffloading(Action::OFK_OpenMP) ||
       (JA.isHostOffloading(C.getActiveOffloadKinds()) &&
+       C.getActiveOffloadKinds() != Action::OFK_None &&
        Args.hasFlag(options::OPT_offload_new_driver,
-                    options::OPT_no_offload_new_driver, false));
+                    options::OPT_no_offload_new_driver, true));
 
   bool IsRDCMode =
       Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -5374,7 +5375,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (IsUsingLTO) {
       if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
           !Args.hasFlag(options::OPT_offload_new_driver,
-                        options::OPT_no_offload_new_driver, false) &&
+                        options::OPT_no_offload_new_driver, true) &&
           !Triple.isAMDGPU()) {
         D.Diag(diag::err_drv_unsupported_opt_for_target)
             << Args.getLastArg(options::OPT_foffload_lto,
@@ -6843,16 +6844,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     Args.addOptOutFlag(CmdArgs, options::OPT_fopenmp_extensions,
                        options::OPT_fno_openmp_extensions);
   }
-  // Forward the offload runtime change to code generation, liboffload implies
-  // new driver. Otherwise, check if we should forward the new driver to change
-  // offloading code generation.
-  if (Args.hasFlag(options::OPT_foffload_via_llvm,
-                   options::OPT_fno_offload_via_llvm, false)) {
-    CmdArgs.append({"--offload-new-driver", "-foffload-via-llvm"});
-  } else if (Args.hasFlag(options::OPT_offload_new_driver,
-                          options::OPT_no_offload_new_driver, false)) {
+
+  // Forward the new driver to change offloading code generation.
+  if (C.getActiveOffloadKinds() != Action::OFK_None &&
+      Args.hasFlag(options::OPT_offload_new_driver,
+                   options::OPT_no_offload_new_driver, true))
     CmdArgs.push_back("--offload-new-driver");
-  }
 
   const XRayArgs &XRay = TC.getXRayArgs();
   XRay.addArgs(TC, Args, CmdArgs, InputType);
diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
index b05bf3b97b7eb71..8f1200f1733597b 100644
--- a/clang/test/Driver/cl-offload.cu
+++ b/clang/test/Driver/cl-offload.cu
@@ -18,11 +18,10 @@
 // CUDA-SAME: "-Weverything"
 // CUDA: link
 
-// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
-// HIP-SAME: "-Weverything"
 // HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc"
 // HIP-SAME: "-Weverything"
-// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}}
+// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
+// HIP-SAME: "-Weverything"
 // HIP: {{link.* "amdhip64.lib"}}
 
 // CMake uses this option when finding packages for HIP, so
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index e96191cc9d41836..a0ae16452692bf1 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -68,19 +68,19 @@
 
 // HIP: clang-offload-bundler
 
-// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
-// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
-// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
-// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
-// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
-// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
-// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
-// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
-// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
-// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
-// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
-// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
-// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
+// SM20:--image=profile=sm_20{{.*}}
+// SM21:--image=profile=sm_21{{.*}}
+// SM30:--image=profile=sm_30{{.*}}
+// SM32:--image=profile=sm_32{{.*}}
+// SM35:--image=profile=sm_35{{.*}}
+// SM37:--image=profile=sm_37{{.*}}
+// SM50:--image=profile=sm_50{{.*}}
+// SM52:--image=profile=sm_52{{.*}}
+// SM53:--image=profile=sm_53{{.*}}
+// SM60:--image=profile=sm_60{{.*}}
+// SM61:--image=profile=sm_61{{.*}}
+// SM62:--image=profile=sm_62{{.*}}
+// SM70:--image=profile=sm_70{{.*}}
 // GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
 // GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
 // GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602
diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu
index 8ee1884936c069d..e36198898e5828f 100644
--- a/clang/test/Driver/cuda-bindings.cu
+++ b/clang/test/Driver/cuda-bindings.cu
@@ -23,14 +23,14 @@
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
-// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test single gpu architecture up to the assemble phase.
 //
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM %s
-// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
+// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[BINDINGS:.+.s]]"
 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -61,8 +61,8 @@
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
-// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
-// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
+// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
+// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
 
 // .. same, but with -fsyntax-only
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
@@ -78,9 +78,9 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=SYN %s
 // SYN-NOT: inputs:
-// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      SYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+// SYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NOT: inputs
 
 // .. and with --offload-new-driver
@@ -97,7 +97,7 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=NDSYN %s
 // NDSYN-NOT: inputs:
-// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NOT: inputs:
@@ -109,8 +109,8 @@
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM2 %s
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+.s]]"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+.s]]"
 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -125,7 +125,7 @@
 // RUN: | FileCheck -check-prefix=HBIN %s
 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
-// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test one or more gpu architecture up to the assemble phase in host-only
@@ -163,7 +163,7 @@
 // Test two gpu architectures with complete compilation in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
 // RUN: | FileCheck -check-prefix=DBIN2 %s
 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -177,7 +177,7 @@
 // Test two gpu architectures up to the assemble phase in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
 // RUN: | FileCheck -check-prefix=DASM2 %s
 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index 67facf77f6c68a9..34d173b398eb9cb 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -2,13 +2,13 @@
 
 // Simple compilation case. Compile device-side to PTX assembly and make sure
 // we use it on the host side.
-// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix NOLINK %s
 
 // Typical compilation + link case.
-// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
@@ -33,7 +33,7 @@
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
-// RUN:    --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
+// RUN:    --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
@@ -48,27 +48,27 @@
 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
 // device compilation.
-// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix DEVICE-SM52 -check-prefix HOST \
 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
 
 // Verify that there is one device-side compilation per --cuda-gpu-arch args
 // and that all results are included on the host side.
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
 // RUN:             -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -128,9 +128,9 @@
 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN:   --no-cuda-gpu-arch=all \
+// RUN:   --no-cuda-version-check --no-cuda-gpu-arch=all \
 // RUN:   --cuda-gpu-arch=sm_70 \
-// RUN:   -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN:   -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
 
 // g) There's no --cuda-gpu-arch=all
@@ -141,7 +141,7 @@
 
 
 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
-// a) by default we're including PTX for all GPUs.
+// a) by default we're not including PTX for all GPUs.
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 // RUN:   -c %s 2>&1 \
@@ -183,7 +183,8 @@
 // Verify -flto=thin -fwhole-program-vtables handling. This should result in
 // both options being passed to the host compilation, with neither passed to
 // the device compilation.
-// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
+// RUN:   -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 
diff --git a/clang/test/Driver/cuda-output-asm.cu b/clang/test/Driver/cuda-output-asm.cu
index 6b944d189172476..9d5b86bcbc1b463 100644
--- a/clang/test/Driver/cuda-output-asm.cu
+++ b/clang/test/Driver/cuda-output-asm.cu
@@ -17,13 +17,9 @@
 // SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // SM30-same: "-target-cpu" "sm_30"
 
-// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \
 // RUN:   --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \
 // RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
-// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files
 // Make sure we do not get duplicate diagnostics.
 // MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files
diff --git a/clang/test/Driver/hip-gz-options.hip b/clang/test/Driver/hip-gz-options.hip
index 7425d5fa847b3f0..7bce8d5f66eebb3 100644
--- a/clang/test/Driver/hip-gz-options.hip
+++ b/clang/test/Driver/hip-gz-options.hip
@@ -11,4 +11,3 @@
 // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}}
 // CHECK-DAG: {{".*lld" .* "--compress-debug-sections=zlib"}}
 // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}}
-// CHECK: "--compress-debug-sections=zlib"
diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip
index 555043facb2a356..94e6c4b8bfe0aa9 100644
--- a/clang/test/Driver/hip-invalid-target-id.hip
+++ b/clang/test/Driver/hip-invalid-target-id.hip
@@ -4,7 +4,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
 
-// NOPLUS: error: invalid target ID 'gfx908xnack'
+// NOPLUS: error: unsupported HIP gpu architecture: gfx908xnack
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx900 \
@@ -55,7 +55,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
 
-// NOCOLON: error: invalid target ID 'gfx900+xnack'
+// NOCOLON: error: unsupported HIP gpu architecture: gfx900+xnack
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx908 \
diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip
index 3b3afba0b18ca34..36e0f71bd6eff60 100644
--- a/clang/test/Driver/hip-macros.hip
+++ b/clang/test/Driver/hip-macros.hip
@@ -73,8 +73,6 @@
 // RUN: %clang -E -dM --offload-arch=gfx940 --cuda-device-only -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck --check-prefixes=NOPTS %s
 // PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1
-// PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1
-// PTS-DAG: #define HIP_API_PER_THREAD_DEFAULT_STREAM 1
 // PTS-DAG: #define HIP_API_PER_THREAD_DEFAULT_STREAM 1
 // NOPTS-NOT: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__
 // NOPTS-NOT: #define HIP_API_PER_THREAD_DEFAULT_STREAM
@@ -85,4 +83,3 @@
 // RUN:   %s 2>&1 | FileCheck --check-prefix=APPROX %s
 // NOAPPROX-NOT: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__
 // APPROX: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__ 1
-// APPROX: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__ 1
diff --git a/clang/test/Driver/hip-offload-arch.hip b/clang/test/Driver/hip-offload-arch.hip
index dd65a0e103ec693..1af53baf63da74f 100644
--- a/clang/test/Driver/hip-offload-arch.hip
+++ b/clang/test/Driver/hip-offload-arch.hip
@@ -4,5 +4,5 @@
 // RUN:   -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck %s
 
-// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1030"}}
-// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1031"}}
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030"
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1031"
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 8c13137735fb911..35db01029feca90 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -83,10 +83,6 @@
 // RUN:   --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
 
-// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
-
 // Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
 // HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
@@ -122,7 +118,7 @@
 
 // Check -Xoffload-linker option is passed to lld.
 
-// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --no-offload-new-driver \
 // RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -Xoffload-linker --build-id=md5 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=OFL-LINK %s
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
diff --git a/clang/test/Driver/hip-sanitize-options.hip b/clang/test/Driver/hip-sanitize-options.hip
index d94cbdacdaeb3a8..da3766790376d54 100644
--- a/clang/test/Driver/hip-sanitize-options.hip
+++ b/clang/test/Driver/hip-sanitize-options.hip
@@ -56,8 +56,8 @@
 // NORDC-NOT: {{"[^"]*lld(\.exe){0,1}".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}}
 // NORDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 
-// RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 // RDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* "-mlink-bitcode-file" ".*asanrtl.bc".* "-mlink-builtin-bitcode" ".*hip.bc".* "-fsanitize=address".*}} "-o" "[[OUT:[^"]*.bc]]"
+// RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 // RDC-NOT: {{"[^"]*lld(\.exe){0,1}".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}}
 
 // FAIL: AMDGPU address sanitizer runtime library (asanrtl) is not found. Please install ROCm device library which supports address sanitizer
diff --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip
index 142c3f1611a3602..2e8489f65d7e0bb 100644
--- a/clang/test/Driver/hip-save-temps.hip
+++ b/clang/test/Driver/hip-save-temps.hip
@@ -1,31 +1,31 @@
 // -fno-gpu-rdc without -o with -c
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc --offload-arch=gfx900 -c %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC %s
 
 // -fno-gpu-rdc without -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,NOUT %s
 
 // -fno-gpu-rdc with -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -o executable --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -o executable --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,WOUT %s
 
 // -fgpu-rdc without -o with -c
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 -c %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -fgpu-rdc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCC %s
 
 // -fgpu-rdc without -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,NOUT %s
 
 // -fgpu-rdc with -o
 // UN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// UN:   -nogpuinc -o executable -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
+// UN:   --offload-new-driver -nogpuinc -o executable -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // UN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,WOUT %s
 
 // -fgpu-rdc host object path
diff --git a/clang/test/Driver/hip-toolchain-device-only.hip b/clang/test/Driver/hip-toolchain-device-only.hip
index 12097819f668881..c0621854f17ceae 100644
--- a/clang/test/Driver/hip-toolchain-device-only.hip
+++ b/clang/test/Driver/hip-toolchain-device-only.hip
@@ -21,7 +21,3 @@
 
 // CHECK: [[LLD]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
-
-// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
-// CHECK-SAME: "-targets={{.*}},hip{{.*}}-amdgcn-amd-amdhsa--gfx803,hip{{.*}}-amdgcn-amd-amdhsa--gfx900"
-// CHECK-SAME: "-input={{.*}}" "-input=[[IMG_DEV_A_803]]" "-input=[[IMG_DEV_A_900]]" "-output=[[BUNDLE_A:.*hipfb]]"
diff --git a/clang/test/Driver/hip-toolchain-mllvm.hip b/clang/test/Driver/hip-toolchain-mllvm.hip
index 33018cc398915b3..bedb053b9006cc8 100644
--- a/clang/test/Driver/hip-toolchain-mllvm.hip
+++ b/clang/test/Driver/hip-toolchain-mllvm.hip
@@ -30,13 +30,11 @@
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: {{.*}} "-target-cpu" "gfx803"
 // CHECK-SAME: {{.*}} "-mllvm" "-unroll-count=10" {{.*}}
-// CHECK: [[LLD:".*lld.*"]] {{.*}}"-m" "elf64_amdgpu"{{.*}} "-plugin-opt=-unroll-count=10"{{.*}} "-plugin-opt=-inline-threshold=100"
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: {{.*}} "-target-cpu" "gfx900"
 // CHECK-SAME: {{.*}} "-mllvm" "-unroll-count=10" {{.*}}
-// CHECK: [[LLD:".*lld.*"]] {{.*}} "-plugin-opt=-unroll-count=10"{{.*}} "-plugin-opt=-inline-threshold=100"
 
 // NEG-NOT: {{".*opt"}}
 // NEG-NOT: {{".*llc"}}
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip
index 4a91c9dbe7570ef..e0591c2b4d53359 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -36,7 +36,7 @@
 // RUN:   %t/a.o %t/b.o \
 // RUN: 2>&1 | FileCheck -check-prefixes=LKONLY %s
 
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --target=x86_64-linux-gnu --no-offload-new-driver \
 // RUN:   --offload-arch=amdgcnspirv --offload-arch=gfx900 \
 // RUN:   %s -nogpuinc -nogpulib \
 // RUN: 2>&1 | FileCheck -check-prefixes=AMDGCNSPIRV %s
diff --git a/clang/test/Driver/invalid-offload-options.cpp b/clang/test/Driver/invalid-offload-options.cpp
index 48d5310538a3cf7..a0b7f1bdbd3981e 100644
--- a/clang/test/Driver/invalid-offload-options.cpp
+++ b/clang/test/Driver/invalid-offload-options.cpp
@@ -26,4 +26,4 @@
 // RUN:   --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s
 
-// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload'
+// OFFLOAD-ARCH-MIX: error: option '--offload' cannot be specified with '--offload-arch'
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index ebafd7eb7774ec1..58d13e284f7cdec 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1220,8 +1220,7 @@ getDeviceInput(const ArgList &Args) {
             : std::string(Arg->getValue());
 
     if (!Filename && Arg->getOption().matches(OPT_library))
-      reportError(
-          createStringError("unable to find library -l%s", Arg->getValue()));
+      return createStringError("unable to find library -l%s", Arg->getValue());
 
     if (!Filename || !sys::fs::exists(*Filename) ||
         sys::fs::is_directory(*Filename))
@@ -1233,6 +1232,12 @@ getDeviceInput(const ArgList &Args) {
       return createFileError(*Filename, EC);
 
     MemoryBufferRef Buffer = **BufferOrErr;
+    if (identify_magic(Buffer.getBuffer()) == file_magic::offload_bundle ||
+        identify_magic(Buffer.getBuffer()) ==
+            file_magic::offload_bundle_compressed)
+      return createStringError(
+          "clang offload bundles are deprecated. Recompile with "
+          "'--no-offload-new-driver'");
     if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object)
       continue;
 
diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp
index f41a44fa0922a1d..61c84890408f677 100644
--- a/clang/unittests/Tooling/ToolingTest.cpp
+++ b/clang/unittests/Tooling/ToolingTest.cpp
@@ -421,9 +421,9 @@ TEST_F(CommandLineExtractorTest, AcceptOffloadingCompile) {
 TEST_F(CommandLineExtractorTest, AcceptOffloadingSyntaxOnly) {
   addFile("test.c", "int main() {}\n");
   const char *Args[] = {
-      "clang",         "-target",   "arm64-apple-macosx11.0.0",
-      "-fsyntax-only", "-x",        "hip",
-      "test.c",        "-nogpulib", "-nogpuinc"};
+      "clang",     "-target",  "arm64-apple-macosx11.0.0", "-fsyntax-only",
+      "-x",        "hip",      "--no-offload-new-driver",  "test.c",
+      "-nogpulib", "-nogpuinc"};
   EXPECT_NE(extractCC1Arguments(Args), nullptr);
 }
 
diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp
index 89dc12551494fd4..d7f4ee4644a2f01 100644
--- a/llvm/lib/Object/OffloadBinary.cpp
+++ b/llvm/lib/Object/OffloadBinary.cpp
@@ -72,6 +72,15 @@ Error extractFromObject(const ObjectFile &Obj,
   assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type");
 
   for (SectionRef Sec : Obj.sections()) {
+    Expected<StringRef> NameOrErr = Sec.getName();
+    if (!NameOrErr)
+      return NameOrErr.takeError();
+
+    if (NameOrErr->starts_with("__CLANG_OFFLOAD_BUNDLE__"))
+      return createStringError(
+          "clang offload bundles are deprecated. Recompile with "
+          "'--no-offload-new-driver'");
+
     // ELF files contain a section with the LLVM_OFFLOADING type.
     if (Obj.isELF() &&
         static_cast<ELFSectionRef>(Sec).getType() != ELF::SHT_LLVM_OFFLOADING)
@@ -79,10 +88,6 @@ Error extractFromObject(const ObjectFile &Obj,
 
     // COFF has no section types so we rely on the name of the section.
     if (Obj.isCOFF()) {
-      Expected<StringRef> NameOrErr = Sec.getName();
-      if (!NameOrErr)
-        return NameOrErr.takeError();
-
       if (!NameOrErr->starts_with(".llvm.offloading"))
         continue;
     }



More information about the cfe-commits mailing list