[clang] bc08022 - [Clang] Fix Offloading related tests after D156363

Wed Aug 2 12:34:58 PDT 2023

Author: Joseph Huber
Date: 2023-08-02T14:34:51-05:00
New Revision: bc080221b3a2c73739caa2bf0521dd3984d0a934

URL: https://github.com/llvm/llvm-project/commit/bc080221b3a2c73739caa2bf0521dd3984d0a934
DIFF: https://github.com/llvm/llvm-project/commit/bc080221b3a2c73739caa2bf0521dd3984d0a934.diff

LOG: [Clang] Fix Offloading related tests after D156363

This patch fixes failing tests after checking the return code from the
driver. This is mostly due to the ROCm libraries not being present
during most compilations. Passing `-nogpuinc` should allow us to compile
without it for tests that require it. Additionally, some old tests set
the architecture of Nvidia tests to `sm_35` which is officially
unsupported in CUDA 12+ so it prints an error. We just increase in this
case.

Reviewed By: MaskRay, yaxunl

Differential Revision: https://reviews.llvm.org/D156930

Added: 
    clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-sm_52.bc
    clang/test/Driver/Inputs/libomptarget/subdir/libomptarget-nvptx-sm_52.bc

Modified: 
    clang/test/Driver/amdgpu-hip-system-arch.c
    clang/test/Driver/cuda-bad-arch.cu
    clang/test/Driver/hip-autolink.hip
    clang/test/Driver/hip-binding.hip
    clang/test/Driver/hip-cuid-hash.hip
    clang/test/Driver/hip-cuid.hip
    clang/test/Driver/hip-default-gpu-arch.hip
    clang/test/Driver/hip-device-compile.hip
    clang/test/Driver/hip-host-cpu-features.hip
    clang/test/Driver/hip-launch-api.hip
    clang/test/Driver/hip-link-bc-to-bc.hip
    clang/test/Driver/hip-link-bundle-archive.hip
    clang/test/Driver/hip-no-device-libs.hip
    clang/test/Driver/hip-options.hip
    clang/test/Driver/hip-output-file-name.hip
    clang/test/Driver/hip-printf.hip
    clang/test/Driver/hip-save-temps.hip
    clang/test/Driver/hip-std.hip
    clang/test/Driver/hip-syntax-only.hip
    clang/test/Driver/hip-toolchain-dwarf.hip
    clang/test/Driver/hip-toolchain-features.hip
    clang/test/Driver/hip-toolchain-mllvm.hip
    clang/test/Driver/hip-toolchain-opt.hip
    clang/test/Driver/lto.cu
    clang/test/Driver/openmp-offload-gpu.c
    clang/test/Driver/openmp-offload-infer.c

Removed: 
    clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-sm_35.bc
    clang/test/Driver/Inputs/libomptarget/subdir/libomptarget-nvptx-sm_35.bc


################################################################################
diff  --git a/clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-sm_35.bc b/clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-sm_52.bc
similarity index 100%
rename from clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-sm_35.bc
rename to clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-sm_52.bc

diff  --git a/clang/test/Driver/Inputs/libomptarget/subdir/libomptarget-nvptx-sm_35.bc b/clang/test/Driver/Inputs/libomptarget/subdir/libomptarget-nvptx-sm_52.bc
similarity index 100%
rename from clang/test/Driver/Inputs/libomptarget/subdir/libomptarget-nvptx-sm_35.bc
rename to clang/test/Driver/Inputs/libomptarget/subdir/libomptarget-nvptx-sm_52.bc

diff  --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c
index 1c72c2d413032c..6b2955c274135b 100644

--- a/clang/test/Driver/amdgpu-hip-system-arch.c
+++ b/clang/test/Driver/amdgpu-hip-system-arch.c
@@ -26,8 +26,8 @@
 // EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch'
 
 // case when amdgpu-arch returns a gfx906 GPU.
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=ARCH-GFX906
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=ARCH-GFX906
 // ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"

diff  --git a/clang/test/Driver/cuda-bad-arch.cu b/clang/test/Driver/cuda-bad-arch.cu
index 68988712d83a5f..877b20bc9351bc 100644
--- a/clang/test/Driver/cuda-bad-arch.cu
+++ b/clang/test/Driver/cuda-bad-arch.cu
@@ -20,23 +20,19 @@
 
 // BAD_CUDA9: GPU arch sm_21 is supported by CUDA versions between 7.0 and 8.0
 
-// RUN: not %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_20 -c %s 2>&1 \
+// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefix OK %s
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
+// RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx908 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix OK %s
-// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --cuda-gpu-arch=gfx908 -c %s 2>&1 \
+// RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx90a -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix OK %s
-// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --cuda-gpu-arch=gfx90a -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix OK %s
-// RUN: not %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=gfx940 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix OK %s
-// RUN: not %clang -### --target=x86_64-linux-gnu -c %s 2>&1 \
+// RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx940 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix OK %s
 
 // We don't allow using NVPTX/AMDGCN for host compilation.
-// RUN: not %clang -### --cuda-host-only --target=nvptx-nvidia-cuda -c %s 2>&1 \
+// RUN: not %clang -### --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix HOST_NVPTX %s
-// RUN: not %clang -### --cuda-host-only --target=amdgcn-amd-amdhsa -c %s 2>&1 \
+// RUN: not %clang -### --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix HOST_AMDGCN %s
 
 // OK-NOT: error: Unsupported CUDA gpu architecture

diff  --git a/clang/test/Driver/hip-autolink.hip b/clang/test/Driver/hip-autolink.hip
index a78ca7c00d2ee4..bb0e58a186cddc 100644
--- a/clang/test/Driver/hip-autolink.hip
+++ b/clang/test/Driver/hip-autolink.hip
@@ -1,9 +1,9 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 //
-// RUN: not %clang --target=i386-pc-windows-msvc --cuda-gpu-arch=gfx906 -nogpulib \
+// RUN: %clang --target=i386-pc-windows-msvc --cuda-gpu-arch=gfx906 -nogpulib -nogpuinc \
 // RUN:   --cuda-device-only %s -### 2>&1 | FileCheck --check-prefix=DEV %s
-// RUN: not %clang --target=i386-pc-windows-msvc --cuda-gpu-arch=gfx906 -nogpulib \
+// RUN: %clang --target=i386-pc-windows-msvc --cuda-gpu-arch=gfx906 -nogpulib -nogpuinc \
 // RUN:   --cuda-host-only %s -### 2>&1 | FileCheck --check-prefix=HOST %s
 
 // DEV: "-cc1" "-triple" "amdgcn-amd-amdhsa"

diff  --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip
index 4a8d6e9c331a01..c48397168a60f0 100644
--- a/clang/test/Driver/hip-binding.hip
+++ b/clang/test/Driver/hip-binding.hip
@@ -56,7 +56,7 @@
 // Check to make sure we can generate multiple outputs for device-only
 // compilation and fail with '-o'.
 //
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-bindings \
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-bindings -nogpulib -nogpuinc \
 // RUN:        --offload-arch=gfx90a --offload-arch=gfx908 --offload-device-only -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY %s
 //      MULTI-D-ONLY: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[GFX908:.+]]"
@@ -64,7 +64,7 @@
 // MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[GFX90a:.+]]"
 // MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[GFX90a]]"], output: "[[GFX90a_OUT:.+]]"
 //
-// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-bindings \
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-bindings -nogpulib -nogpuinc \
 // RUN:        --offload-arch=gfx90a --offload-arch=gfx908 --offload-device-only -c -o %t %s 2>&1 \
 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY-O %s
 // MULTI-D-ONLY-O: error: cannot specify -o when generating multiple output files
@@ -73,7 +73,7 @@
 // Check to ensure that we can use '-fsyntax-only' for HIP output with the new
 // driver.
 // 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu --offload-new-driver \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu --offload-new-driver -nogpulib -nogpuinc \
 // RUN:        -fsyntax-only --offload-arch=gfx90a --offload-arch=gfx908 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SYNTAX-ONLY %s
 // SYNTAX-ONLY: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-fsyntax-only"
@@ -84,7 +84,7 @@
 // Check to ensure that we can use '-foffload' when not operating in RDC-mode.
 //
 // RUN: %clang -### --target=x86_64-linux-gnu -fno-gpu-rdc --offload-new-driver -ccc-print-bindings \
-// RUN:        -foffload-lto --offload-arch=gfx90a --offload-arch=gfx908 -c %s 2>&1 \
+// RUN:        -nogpulib -nogpuinc -foffload-lto --offload-arch=gfx90a --offload-arch=gfx908 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
 //      LTO-NO-RDC: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[LTO_908:.+]]"
 // LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_908]]"], output: "[[OBJ_908:.+]]"

diff  --git a/clang/test/Driver/hip-cuid-hash.hip b/clang/test/Driver/hip-cuid-hash.hip
index c3b09746ca4854..1b4d26c471c15d 100644
--- a/clang/test/Driver/hip-cuid-hash.hip
+++ b/clang/test/Driver/hip-cuid-hash.hip
@@ -4,12 +4,12 @@
 // Check CUID generated by hash.
 // The same CUID is generated for the same file with the same options.
 
-// RUN: not %clang -### -x hip --target=x86_64-unknown-linux-gnu \
-// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1
 
-// RUN: not %clang -### -x hip --target=x86_64-unknown-linux-gnu \
-// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1
 
 // RUN: FileCheck %s -check-prefixes=SAME -input-file %t.out
@@ -17,12 +17,12 @@
 // Check CUID generated by hash.
 // Different CUID's are generated for the same file with 
diff erent options.
 
-// RUN: not %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=1 \
-// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=1 \
+// RUN:   --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1
 
-// RUN: not %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=2 \
-// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=2 \
+// RUN:   --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
 // RUN:   %S/Inputs/../Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1
 
 // RUN: FileCheck %s -check-prefixes=DIFF -input-file %t.out

diff  --git a/clang/test/Driver/hip-cuid.hip b/clang/test/Driver/hip-cuid.hip
index 10e8bef96ca610..421810b824fd61 100644
--- a/clang/test/Driver/hip-cuid.hip
+++ b/clang/test/Driver/hip-cuid.hip
@@ -7,51 +7,51 @@
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --offload-arch=gfx900 \
 // RUN:   --offload-arch=gfx906 \
-// RUN:   -c -nogpulib -fuse-cuid=invalid \
+// RUN:   -c -nogpuinc -nogpulib -fuse-cuid=invalid \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=INVALID %s
 
 // Check random CUID generator.
 
-// RUN: not %clang -### -x hip \
+// RUN: %clang -### -x hip \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --offload-arch=gfx900 \
 // RUN:   --offload-arch=gfx906 \
-// RUN:   -c -nogpulib -fuse-cuid=random \
+// RUN:   -c -nogpuinc -nogpulib -fuse-cuid=random \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s
 
 // Check fixed CUID.
 
-// RUN: not %clang -### -x hip \
+// RUN: %clang -### -x hip \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --offload-arch=gfx900 \
 // RUN:   --offload-arch=gfx906 \
-// RUN:   -c -nogpulib -cuid=xyz_123 \
+// RUN:   -c -nogpuinc -nogpulib -cuid=xyz_123 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s
 
 // Check fixed CUID override -fuse-cuid.
 
-// RUN: not %clang -### -x hip \
+// RUN: %clang -### -x hip \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --offload-arch=gfx900 \
 // RUN:   --offload-arch=gfx906 \
-// RUN:   -c -nogpulib -fuse-cuid=random -cuid=xyz_123 \
+// RUN:   -c -nogpuinc -nogpulib -fuse-cuid=random -cuid=xyz_123 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s
 
 // Check hash CUID generator.
 
-// RUN: not %clang -### -x hip \
+// RUN: %clang -### -x hip \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --offload-arch=gfx900 \
 // RUN:   --offload-arch=gfx906 \
-// RUN:   -c -nogpulib -fuse-cuid=hash \
+// RUN:   -c -nogpuinc -nogpulib -fuse-cuid=hash \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s

diff  --git a/clang/test/Driver/hip-default-gpu-arch.hip b/clang/test/Driver/hip-default-gpu-arch.hip
index ff4de8c9a4b095..0476aa3f99bfa5 100644
--- a/clang/test/Driver/hip-default-gpu-arch.hip
+++ b/clang/test/Driver/hip-default-gpu-arch.hip
@@ -1,6 +1,6 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### -c %s 2>&1 | FileCheck %s
+// RUN: %clang -### -nogpulib -nogpuinc -c %s 2>&1 | FileCheck %s
 
 // CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx906"

diff  --git a/clang/test/Driver/hip-device-compile.hip b/clang/test/Driver/hip-device-compile.hip
index a2265d0264e780..3c3e3878562474 100644
--- a/clang/test/Driver/hip-device-compile.hip
+++ b/clang/test/Driver/hip-device-compile.hip
@@ -6,56 +6,56 @@
 // is specified.
 
 // Output unbundled bitcode.
-// RUN: not %clang -c -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN: %clang -c -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BC,NBUN %s
 
 // Output bundled bitcode.
-// RUN: not %clang -c -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN: %clang -c -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BCBUN %s
 
 // Output unbundled LLVM IR.
-// RUN: not %clang -c -S -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN: %clang -c -S -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LL,NBUN %s
 
 // Output bundled LLVM IR.
-// RUN: not %clang -c -S -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN: %clang -c -S -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LLBUN %s
 
 // Output unbundled assembly.
-// RUN: not %clang -c -S --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN: %clang -c -S --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM,NBUN %s
 
 // Output relocatable.
-// RUN: not %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 -fhip-emit-relocatable \
+// RUN: %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 -fhip-emit-relocatable \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,NBUN,RELOC %s
 
 // Output bundled assembly.
-// RUN: not %clang -c -S --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN: %clang -c -S --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
@@ -94,32 +94,32 @@
 // specified.
 
 // Output bundled code objects.
-// RUN: not %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
+// RUN: %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-BUN %s
 
 // Output unbundled code objects.
-// RUN: not %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
+// RUN: %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-UBUN %s
 
 // Output bundled code objects.
-// RUN: not %clang --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
+// RUN: %clang --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-BUN %s
 
 // Output unbundled code objects.
-// RUN: not %clang --cuda-device-only -### --target=x86_64-linux-gnu \
-// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
+// RUN: %clang --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \

diff  --git a/clang/test/Driver/hip-host-cpu-features.hip b/clang/test/Driver/hip-host-cpu-features.hip
index 717c6e265ef995..f288d96373bfe3 100644
--- a/clang/test/Driver/hip-host-cpu-features.hip
+++ b/clang/test/Driver/hip-host-cpu-features.hip
@@ -1,9 +1,9 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### -c --target=x86_64-linux-gnu -march=znver2 --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTCPU
-// RUN: not %clang -### -c --target=x86_64-linux-gnu -msse3 --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTSSE3
-// RUN: not %clang -### -c --target=x86_64-linux-gnu --gpu-use-aux-triple-only -march=znver2 --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=NOHOSTCPU
+// RUN: %clang -### -c --target=x86_64-linux-gnu -march=znver2 --cuda-gpu-arch=gfx803 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTCPU
+// RUN: %clang -### -c --target=x86_64-linux-gnu -msse3 --cuda-gpu-arch=gfx803 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTSSE3
+// RUN: %clang -### -c --target=x86_64-linux-gnu --gpu-use-aux-triple-only -march=znver2 --cuda-gpu-arch=gfx803 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s -check-prefix=NOHOSTCPU
 
 // HOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // HOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"

diff  --git a/clang/test/Driver/hip-launch-api.hip b/clang/test/Driver/hip-launch-api.hip
index 9b1fd7cf2c36e2..8b240240bb5747 100644
--- a/clang/test/Driver/hip-launch-api.hip
+++ b/clang/test/Driver/hip-launch-api.hip
@@ -3,14 +3,14 @@
 
 // By default FE assumes -fhip-new-launch-api.
 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
-// RUN:   2>&1 | FileCheck -check-prefixes=NEW %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 \ 
+// RUN:   -nogpulib -nogpuinc %s 2>&1 | FileCheck -check-prefixes=NEW %s
 // NEW: "-fhip-new-launch-api"
 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
-// RUN:   -fhip-new-launch-api 2>&1 | FileCheck -check-prefixes=NEW %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
+// RUN:   -nogpulib -nogpuinc -fhip-new-launch-api 2>&1 | FileCheck -check-prefixes=NEW %s
 // NEW: "-fhip-new-launch-api"
 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
-// RUN:   -fno-hip-new-launch-api 2>&1 | FileCheck -check-prefixes=OLD %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
+// RUN:   -nogpulib -nogpuinc -fno-hip-new-launch-api 2>&1 | FileCheck -check-prefixes=OLD %s
 // OLD-NOT: "-fhip-new-launch-api"

diff  --git a/clang/test/Driver/hip-link-bc-to-bc.hip b/clang/test/Driver/hip-link-bc-to-bc.hip
index 0608bed5b1ebbe..52eab97bcebb0e 100644
--- a/clang/test/Driver/hip-link-bc-to-bc.hip
+++ b/clang/test/Driver/hip-link-bc-to-bc.hip
@@ -5,8 +5,8 @@
 // RUN: touch %t/bundle1.bc
 // RUN: touch %t/bundle2.bc
 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu --offload-arch=gfx906 --hip-link \
-// RUN:   -emit-llvm -fgpu-rdc --cuda-device-only \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu --offload-arch=gfx906 --hip-link \
+// RUN:   -nogpulib -nogpuinc -emit-llvm -fgpu-rdc --cuda-device-only \
 // RUN:   %t/bundle1.bc %t/bundle2.bc \
 // RUN:   2>&1 | FileCheck -check-prefix=BITCODE %s
 
@@ -22,8 +22,8 @@
 // RUN: llvm-ar rc %t/libhipbundle.a
 // RUN: touch %t/bundle.bc
 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu --offload-arch=gfx906 --hip-link \
-// RUN:   -emit-llvm -fgpu-rdc --cuda-device-only \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu --offload-arch=gfx906 --hip-link \
+// RUN:   -nogpulib -nogpuinc -emit-llvm -fgpu-rdc --cuda-device-only \
 // RUN:   %t/bundle.bc -L%t -lhipbundle \
 // RUN:   2>&1 | FileCheck -check-prefix=ARCHIVE %s
 

diff  --git a/clang/test/Driver/hip-link-bundle-archive.hip b/clang/test/Driver/hip-link-bundle-archive.hip
index a1234270110ac3..7da7b91d1e6251 100644
--- a/clang/test/Driver/hip-link-bundle-archive.hip
+++ b/clang/test/Driver/hip-link-bundle-archive.hip
@@ -8,66 +8,66 @@
 // RUN: rm -rf %t hipBundled && mkdir %t hipBundled
 // RUN: touch %t/dummy.bc
 // RUN: llvm-ar cr %t/libhipBundled.a %t/dummy.bc
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc -L%t -lhipBundled \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc -L%t -lhipBundled \
 // RUN:   2>&1 | FileCheck -check-prefixes=GNU,GNU1,GNU-L %s
 
 // RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 -nogpuinc \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc -L%t -l:libhipBundled.a \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc -L%t -l:libhipBundled.a \
 // RUN:   2>&1 | FileCheck -check-prefixes=GNU,GNU1,GNU-LA %s
 
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc %t/libhipBundled.a \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc %t/libhipBundled.a \
 // RUN:   2>&1 | FileCheck -check-prefixes=GNU,GNU1,GNU-A %s
 
 // RUN: llvm-ar cr %t/libhipBundled.a.5.2 %t/dummy.bc
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc %t/libhipBundled.a.5.2 \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc %t/libhipBundled.a.5.2 \
 // RUN:   2>&1 | FileCheck -check-prefixes=GNU,GNU2,GNU-A %s
 
 // Check if a file is not an archive, it is not unbundled.
 
 // RUN: touch %t/libNonArchive.a
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc -L%t -lNonArchive \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc -L%t -lNonArchive \
 // RUN:   2>&1 | FileCheck -check-prefixes=NONARCHIVE %s
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc -L%t -l:libNonArchive.a \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc -L%t -l:libNonArchive.a \
 // RUN:   2>&1 | FileCheck -check-prefixes=NONARCHIVE %s
 // RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc -L%t libNonArchive.a \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc -L%t libNonArchive.a \
 // RUN:   2>&1 | FileCheck -check-prefixes=NONARCHIVE %s
 
 // Check if a file does not exist, it is not unbundled.
 
 // RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
-// RUN:   -nogpulib %s -fgpu-rdc %t/NoneExist.a \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc %t/NoneExist.a \
 // RUN:   2>&1 | FileCheck -check-prefixes=NONE %s
 
 // Check unbundling archive for MSVC.
 
 // RUN: llvm-ar cr %t/hipBundled2.lib %t/dummy.bc
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-pc-windows-msvc \
-// RUN:   -nogpulib %s -fgpu-rdc -L%t -lhipBundled2 \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc -L%t -lhipBundled2 \
 // RUN:   2>&1 | FileCheck -check-prefix=MSVC %s
 
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-pc-windows-msvc \
-// RUN:   -nogpulib %s -fgpu-rdc -L%t -l:hipBundled2.lib \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc -L%t -l:hipBundled2.lib \
 // RUN:   2>&1 | FileCheck -check-prefix=MSVC %s
 
-// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
+// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
 // RUN:   --target=x86_64-pc-windows-msvc \
-// RUN:   -nogpulib %s -fgpu-rdc %t/hipBundled2.lib \
+// RUN:   -nogpuinc -nogpulib %s -fgpu-rdc %t/hipBundled2.lib \
 // RUN:   2>&1 | FileCheck -check-prefix=MSVC %s
 
 // GNU1: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}[[LIB:libhipBundled\.a]]" "-targets=hip-amdgcn-amd-amdhsa-gfx1030" "-output=[[A1030:.*\.a]]" "-allow-missing-bundles"

diff  --git a/clang/test/Driver/hip-no-device-libs.hip b/clang/test/Driver/hip-no-device-libs.hip
index 46f2cbb7f73c12..0ae8b8bcc1913a 100644
--- a/clang/test/Driver/hip-no-device-libs.hip
+++ b/clang/test/Driver/hip-no-device-libs.hip
@@ -1,10 +1,9 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### -nogpulib --target=x86_64-linux-gnu \
+// RUN: %clang -### -nogpuinc -nogpulib --target=x86_64-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s
 
 // CHECK-NOT: "-mlink-builtin-bitcode"
-

diff  --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 7a52829b567859..86985acc82c7a5 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -1,7 +1,7 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### -x hip --gpu-max-threads-per-block=1024 %s 2>&1 | FileCheck %s
+// RUN: %clang -### -x hip -nogpulib -nogpuinc --gpu-max-threads-per-block=1024 %s 2>&1 | FileCheck %s
 
 // Check that there are commands for both host- and device-side compilations.
 //
@@ -22,7 +22,7 @@
 // PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
 
 // Check -mprintf-kind=hostcall
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -mprintf-kind=hostcall  %s -save-temps 2>&1 | FileCheck -check-prefix=HOSTC %s
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -mprintf-kind=hostcall  %s -save-temps 2>&1 | FileCheck -check-prefix=HOSTC %s
 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-mprintf-kind=hostcall" "-Werror=format-invalid-specifier"{{.*}}"-E" {{.*}}
 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=hostcall" "-Werror=format-invalid-specifier" {{.*}}"-x" "hip-cpp-output"
 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=hostcall" "-Werror=format-invalid-specifier" {{.*}}"-x" "ir"
@@ -30,26 +30,26 @@
 // HOSTC: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"
 
 // Check -mprintf-kind=buffered
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -mprintf-kind=buffered  %s -save-temps 2>&1 | FileCheck -check-prefix=BUFF %s
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -mprintf-kind=buffered  %s -save-temps 2>&1 | FileCheck -check-prefix=BUFF %s
 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-mprintf-kind=buffered" "-Werror=format-invalid-specifier"{{.*}}"-E" {{.*}}
 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=buffered" "-Werror=format-invalid-specifier" {{.*}}"-x" "hip-cpp-output"
 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=buffered" "-Werror=format-invalid-specifier" {{.*}}"-x" "ir"
 // BUFF: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}} "-E" {{.*}}
 // BUFF: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"
 
-// RUN: not %clang -### -x hip --target=x86_64-pc-windows-msvc -fms-extensions \
+// RUN: %clang -### -x hip -nogpulib -nogpuinc --target=x86_64-pc-windows-msvc -fms-extensions \
 // RUN:   -mllvm -amdgpu-early-inline-all=true  %s 2>&1 | \
 // RUN:   FileCheck -check-prefix=MLLVM %s
 // MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
 
-// RUN: not %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \
+// RUN: %clang -### -Xarch_device -g -nogpulib -nogpuinc --cuda-gpu-arch=gfx900 \
 // RUN:   -Xarch_device -fcf-protection=branch -Xarch_device -mllvm=--inline-threshold=100 \
 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=DEV %s
 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" {{.*}}"-mllvm" "--inline-threshold=100"
 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" {{.*}}"-mllvm" "--inline-threshold=100"
 // DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
 
-// RUN: not %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \
+// RUN: %clang -### -Xarch_host -g -nogpulib -nogpuinc --cuda-gpu-arch=gfx900 \
 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=HOST %s
 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"

diff  --git a/clang/test/Driver/hip-output-file-name.hip b/clang/test/Driver/hip-output-file-name.hip
index 1689a4aabdc4d3..746678b81e251e 100644
--- a/clang/test/Driver/hip-output-file-name.hip
+++ b/clang/test/Driver/hip-output-file-name.hip
@@ -2,8 +2,8 @@
 // REQUIRES: amdgpu-registered-target
 
 // Output bundled code objects for combined compilation.
-// RUN: not %clang -### -c --target=x86_64-linux-gnu -fgpu-rdc \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -c --target=x86_64-linux-gnu -fgpu-rdc \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck %s
 
 // CHECK: {{.*}}clang-offload-bundler{{.*}}"-output=hip-output-file-name.o"
@@ -13,43 +13,43 @@
 // is used to bundle the final output.
 
 // Output bundled PPE for one GPU for mixed compliation.
-// RUN: not %clang -### -E --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 %s \
+// RUN: %clang -### -E --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=DASH %s
 
 // Output unbundled PPE for one GPU for device only compilation.
-// RUN: not %clang -### -E --cuda-device-only --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 %s \
+// RUN: %clang -### -E --offload-device-only --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=CLANG-DASH %s
 
 // Output bundled PPE for two GPUs for mixed compilation.
-// RUN: not %clang -### -E --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -E --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=DASH %s
 
 // Output bundled PPE for two GPUs for mixed compilation with -save-temps.
-// RUN: not %clang -### -E -save-temps --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -E -save-temps --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=DASH %s
 
 // Output unbundled PPE for two GPUs for device only compilation.
-// RUN: not %clang -### -E --cuda-device-only --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -E --offload-device-only --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=CLANG-DASH %s
 
 // Output bundled PPE for two GPUs for device only compilation with --gpu-bundle-output.
-// RUN: not %clang -### -E --cuda-device-only --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --gpu-bundle-output \
+// RUN: %clang -### -E --offload-device-only --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s --gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=DASH %s
 
 // Output unbundled PPE for two GPUs for device only compilation with --no-gpu-bundle-output.
-// RUN: not %clang -### -E --cuda-device-only --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --no-gpu-bundle-output \
+// RUN: %clang -### -E --offload-device-only --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s --no-gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=CLANG-DASH %s
 
 // Output unbundled PPE for host only compilation.
-// RUN: not %clang -### -E --cuda-host-only --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -E --offload-host-only --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=CLANG-DASH %s
 
 // DASH-NOT: {{.*}}clang{{.*}}"-o" "-"
@@ -60,23 +60,23 @@
 // Check -E with -o.
 
 // Output bundled PPE for two GPUs for mixed compilation.
-// RUN: not %clang -### -E -o test.cui --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -E -o test.cui --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=OUT %s
 
 // Output bundled PPE for two GPUs for mixed compilation.
-// RUN: not %clang -### -E -o test.cui -save-temps --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -E -o test.cui -save-temps --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=OUT %s
 
 // Output bundled PPE for two GPUs for device only compilation with --gpu-bundle-output.
-// RUN: not %clang -### -E -o test.cui --cuda-device-only --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 --gpu-bundle-output %s \
+// RUN: %clang -### -E -o test.cui --offload-device-only --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 --gpu-bundle-output %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=OUT %s
 
 // Output unbundled PPE for two GPUs for device only compilation.
-// RUN: not %clang -### -E -o test.cui --cuda-host-only --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: %clang -### -E -o test.cui --offload-host-only --target=x86_64-linux-gnu \
+// RUN:   -nogpulib -nogpuinc --offload-arch=gfx803 --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck -check-prefixes=CLANG-OUT %s
 
 // OUT-NOT: {{.*}}clang{{.*}}"-o" "test.cui"

diff  --git a/clang/test/Driver/hip-printf.hip b/clang/test/Driver/hip-printf.hip
index 719023595e97a1..eb626eda221f55 100644
--- a/clang/test/Driver/hip-printf.hip
+++ b/clang/test/Driver/hip-printf.hip
@@ -1,7 +1,7 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=gfx900 \
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=gfx900 \
 // RUN:   %s 2>&1 | FileCheck %s
 
 // CHECK: [[CLANG:".*clang.*"]] "-cc1"

diff  --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip
index b120edf66b9802..d891b0d9bf627f 100644
--- a/clang/test/Driver/hip-save-temps.hip
+++ b/clang/test/Driver/hip-save-temps.hip
@@ -2,33 +2,33 @@
 // REQUIRES: amdgpu-registered-target
 
 // -fno-gpu-rdc without -o with -c
-// RUN: not %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   --cuda-gpu-arch=gfx900 -c %s 2>&1 | \
+// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
+// RUN:   -nogpuinc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC %s
 
 // -fno-gpu-rdc without -o
-// RUN: not %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   --cuda-gpu-arch=gfx900 %s 2>&1 | \
+// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
+// RUN:   -nogpuinc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,NOUT %s
 
 // -fno-gpu-rdc with -o
-// RUN: not %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -o executable --cuda-gpu-arch=gfx900 %s 2>&1 | \
+// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
+// RUN:   -nogpuinc -o executable --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,WOUT %s
 
 // -fgpu-rdc without -o with -c
-// RUN: not %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -fgpu-rdc --cuda-gpu-arch=gfx900 -c %s 2>&1 | \
+// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
+// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCC %s
 
 // -fgpu-rdc without -o
-// RUN: not %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \
+// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
+// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,NOUT %s
 
 // -fgpu-rdc with -o
 // UN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// UN:   -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \
+// UN:   -nogpuinc -o executable -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // UN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,WOUT %s
 
 // -fgpu-rdc host object path

diff  --git a/clang/test/Driver/hip-std.hip b/clang/test/Driver/hip-std.hip
index 1aa90c44aba478..ad29f04dbbe7ff 100644
--- a/clang/test/Driver/hip-std.hip
+++ b/clang/test/Driver/hip-std.hip
@@ -1,26 +1,26 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
 // RUN:   2>&1 | FileCheck -check-prefixes=DEFAULT %s
 // DEFAULT: "-cc1"{{.*}}"-fcuda-is-device"{{.*}}
 // DEFAULT-NOT: "-std="{{.*}}
 // DEFAULT: "-cc1"{{.*}}
 // DEFAULT-NOT: "-std="{{.*}}
 
-// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -offload-arch=gfx906 %s \
 // RUN:   -std=c++17 %s 2>&1 | FileCheck -check-prefixes=SPECIFIED %s
 // SPECIFIED: "-cc1"{{.*}}"-fcuda-is-device"{{.*}}"-std=c++17"
 // SPECIFIED: "-cc1"{{.*}}"-std=c++17"
 
-// RUN: not %clang -### --target=x86_64-pc-windows-msvc -offload-arch=gfx906 %s \
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-pc-windows-msvc -offload-arch=gfx906 %s \
 // RUN:   2>&1 | FileCheck -check-prefixes=MSVC-DEF %s
 // MSVC-DEF: "-cc1"{{.*}}"-fcuda-is-device"{{.*}}
 // MSVC-DEF-NOT: "-std="{{.*}}
 // MSVC-DEF: "-cc1"{{.*}}
 // MSVC-DEF-NOT: "-std="{{.*}}
 
-// RUN: not %clang -### --target=x86_64-pc-windows-msvc -offload-arch=gfx906 %s \
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-pc-windows-msvc -offload-arch=gfx906 %s \
 // RUN:   -std=c++17 %s 2>&1 | FileCheck -check-prefixes=MSVC-SPEC %s
 // MSVC-SPEC: "-cc1"{{.*}}"-fcuda-is-device"{{.*}}"-std=c++17"
 // MSVC-SPEC: "-cc1"{{.*}}"-std=c++17"

diff  --git a/clang/test/Driver/hip-syntax-only.hip b/clang/test/Driver/hip-syntax-only.hip
index eb208281eae030..0fc53cf755ed2a 100644
--- a/clang/test/Driver/hip-syntax-only.hip
+++ b/clang/test/Driver/hip-syntax-only.hip
@@ -1,7 +1,7 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### -nogpulib --target=x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
+// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
 
 // Check that there are commands for both host- and device-side compilations.
 //

diff  --git a/clang/test/Driver/hip-toolchain-dwarf.hip b/clang/test/Driver/hip-toolchain-dwarf.hip
index cfe4164c680532..2fb4602b50673f 100644
--- a/clang/test/Driver/hip-toolchain-dwarf.hip
+++ b/clang/test/Driver/hip-toolchain-dwarf.hip
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### -c --target=x86_64-linux-gnu -fgpu-rdc \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 %s \
+// RUN: %clang -### -c --target=x86_64-linux-gnu -fgpu-rdc \
+// RUN:   -nogpuinc -nogpulib -x hip --offload-arch=gfx803 %s \
 // RUN:   -Xarch_gfx803 -g 2>&1 | FileCheck %s -check-prefix=DWARF_VER
 
 // DWARF_VER: "-dwarf-version=5"

diff  --git a/clang/test/Driver/hip-toolchain-features.hip b/clang/test/Driver/hip-toolchain-features.hip
index 6807aee137c2e3..2e11ce38403ef4 100644
--- a/clang/test/Driver/hip-toolchain-features.hip
+++ b/clang/test/Driver/hip-toolchain-features.hip
@@ -1,11 +1,11 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx906:xnack+ --cuda-gpu-arch=gfx900:xnack+ %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx906:xnack+ --offload-arch=gfx900:xnack+ %s \
 // RUN:   2>&1 | FileCheck %s -check-prefix=XNACK
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx906:xnack- --cuda-gpu-arch=gfx900:xnack- %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx906:xnack- --offload-arch=gfx900:xnack- %s \
 // RUN:   2>&1 | FileCheck %s -check-prefix=NOXNACK
 
 // XNACK: {{.*}}clang{{.*}}"-target-feature" "+xnack"
@@ -13,11 +13,11 @@
 // XNACK: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+xnack"
 // NOXNACK: {{.*}}lld{{.*}} "-plugin-opt=-mattr=-xnack"
 
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx908:sramecc+ %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx908:sramecc+ %s \
 // RUN:   2>&1 | FileCheck %s -check-prefix=SRAM
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx908:sramecc- %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx908:sramecc- %s \
 // RUN:   2>&1 | FileCheck %s -check-prefix=NOSRAM
 
 // SRAM: {{.*}}clang{{.*}}"-target-feature" "+sramecc"
@@ -25,11 +25,11 @@
 // SRAM: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+sramecc"
 // NOTSRAM: {{.*}}lld{{.*}} "-plugin-opt=-mattr=-sramecc"
 
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx1010 %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx1010 %s \
 // RUN:   -mcumode  2>&1 | FileCheck %s -check-prefix=CUMODE
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx1010 %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx1010 %s \
 // RUN:   -mno-cumode  2>&1 | FileCheck %s -check-prefix=NOTCUMODE
 
 // CUMODE: {{.*}}clang{{.*}}"-target-feature" "+cumode"
@@ -37,21 +37,21 @@
 // CUMODE: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode"
 // NOTCUMODE: {{.*}}lld{{.*}} "-plugin-opt=-mattr=-cumode"
 
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx908:xnack+:sramecc+ %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx908:xnack+:sramecc+ %s \
 // RUN:   2>&1 | FileCheck %s -check-prefix=ALL3
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx908:xnack-:sramecc- %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx908:xnack-:sramecc- %s \
 // RUN:   2>&1 | FileCheck %s -check-prefix=NOALL3
 
 // ALL3: {{.*}}clang{{.*}}"-target-feature" "+sramecc" "-target-feature" "+xnack"
 // NOALL3: {{.*}}clang{{.*}}"-target-feature" "-sramecc" "-target-feature" "-xnack"
 
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx1010 %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx1010 %s \
 // RUN:   -mtgsplit  2>&1 | FileCheck %s -check-prefix=TGSPLIT
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx1010 %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx1010 %s \
 // RUN:   -mno-tgsplit  2>&1 | FileCheck %s -check-prefix=NOTTGSPLIT
 
 // TGSPLIT: {{.*}}clang{{.*}}"-target-feature" "+tgsplit"
@@ -59,8 +59,8 @@
 // TGSPLIT: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+tgsplit"
 // NOTTGSPLIT: {{.*}}lld{{.*}} "-plugin-opt=-mattr=-tgsplit"
 
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx1010 %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx1010 %s \
 // RUN:   -mcumode -mcumode -mno-cumode -mwavefrontsize64 -mcumode \
 // RUN:   -mwavefrontsize64 -mno-wavefrontsize64 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=DUP
@@ -70,8 +70,8 @@
 // DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64"
 // DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode"
 
-// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx1010 %s \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx1010 %s \
 // RUN:   -mno-wavefrontsize64 -mwavefrontsize64 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=WAVE64
 // WAVE64: {{.*}}clang{{.*}} "-target-feature" "+wavefrontsize64"

diff  --git a/clang/test/Driver/hip-toolchain-mllvm.hip b/clang/test/Driver/hip-toolchain-mllvm.hip
index dbfdcc304d265a..194cecc40db583 100644
--- a/clang/test/Driver/hip-toolchain-mllvm.hip
+++ b/clang/test/Driver/hip-toolchain-mllvm.hip
@@ -5,27 +5,27 @@
 // to device lld linker.
 // -mllvm options are passed to clang only.
 
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -mllvm -unroll-count=10 \
+// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx803 --offload-arch=gfx900 \
+// RUN:   -nogpuinc -nogpulib -mllvm -unroll-count=10 \
 // RUN:   -Xoffload-linker -mllvm=-inline-threshold=100 \
 // RUN:   %s 2>&1 | FileCheck %s
 
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -mllvm -unroll-count=10 \
+// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx803 --offload-arch=gfx900 \
+// RUN:   -nogpuinc -nogpulib -mllvm -unroll-count=10 \
 // RUN:   -Xoffload-linker -mllvm=-inline-threshold=100 \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NEG %s
 
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -fgpu-rdc -mllvm -unroll-count=10 \
+// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx803 --offload-arch=gfx900 \
+// RUN:   -nogpuinc -nogpulib -fgpu-rdc -mllvm -unroll-count=10 \
 // RUN:   -Xoffload-linker -mllvm=-inline-threshold=100 \
 // RUN:   %s 2>&1 | FileCheck %s
 
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -fgpu-rdc -mllvm -unroll-count=10 \
+// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx803 --offload-arch=gfx900 \
+// RUN:   -nogpuinc -nogpulib -fgpu-rdc -mllvm -unroll-count=10 \
 // RUN:   -Xoffload-linker -mllvm=-inline-threshold=100 \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NEG %s
 

diff  --git a/clang/test/Driver/hip-toolchain-opt.hip b/clang/test/Driver/hip-toolchain-opt.hip
index 52c45c9a7f0b7a..4de33cf41cbcab 100644
--- a/clang/test/Driver/hip-toolchain-opt.hip
+++ b/clang/test/Driver/hip-toolchain-opt.hip
@@ -1,67 +1,67 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: not %clang -### \
+// RUN: %clang -### \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,DEFAULT %s
 
-// RUN: not %clang -### -O0 \
+// RUN: %clang -### -O0 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,O0 %s
 
-// RUN: not %clang -### -O1 \
+// RUN: %clang -### -O1 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,O1 %s
 
-// RUN: not %clang -### -O2 \
+// RUN: %clang -### -O2 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,O2 %s
 
-// RUN: not %clang -### -O3 \
+// RUN: %clang -### -O3 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,O3 %s
 
-// RUN: not %clang -### -Os \
+// RUN: %clang -### -Os \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,Os %s
 
-// RUN: not %clang -### -Oz \
+// RUN: %clang -### -Oz \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,Oz %s
 
-// RUN: not %clang -### -Og \
+// RUN: %clang -### -Og \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,Og %s
 
-// RUN: not %clang -### -O0 \
+// RUN: %clang -### -O0 \
 // RUN:   -Xoffload-linker --lto-CGO2 \
 // RUN:   --target=x86_64-unknown-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpulib \
+// RUN:   -c -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck --check-prefixes=ALL,O0-CGO2 %s
 

diff  --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu
index 802c039eaa704c..dadf35d8e6c9a9 100644
--- a/clang/test/Driver/lto.cu
+++ b/clang/test/Driver/lto.cu
@@ -2,14 +2,14 @@
 // REQUIRES: nvptx-registered-target
 
 // -flto causes a switch to llvm-bc object files.
-// RUN: %clangxx -nocudainc -nocudalib -ccc-print-phases -c %s -flto 2> %t
+// RUN: %clangxx --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib -ccc-print-phases -c %s -flto 2> %t
 // RUN: FileCheck -check-prefix=CHECK-COMPILE-ACTIONS < %t %s
 //
 // CHECK-COMPILE-ACTIONS: 2: compiler, {1}, ir, (host-cuda)
 // CHECK-COMPILE-ACTIONS-NOT: lto-bc
 // CHECK-COMPILE-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)
 
-// RUN: %clangxx -nocudainc -nocudalib -ccc-print-phases %s -flto 2> %t
+// RUN: %clangxx --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib -ccc-print-phases %s -flto 2> %t
 // RUN: FileCheck -check-prefix=CHECK-COMPILELINK-ACTIONS < %t %s
 //
 // CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}lto.cu", cuda, (host-cuda)
@@ -29,7 +29,7 @@
 
 // llvm-bc and llvm-ll outputs need to match regular suffixes
 // (unfortunately).
-// RUN: %clangxx %s -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t
+// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t
 // RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
 //
 // CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu"
@@ -37,37 +37,37 @@
 // CHECK-COMPILELINK-SUFFIXES: "-o" "[[OBJ:.*lto-host.*\.o]]" {{.*}}[[BC]]"
 // CHECK-COMPILELINK-SUFFIXES: "{{.*}}a.{{(out|exe)}}" {{.*}}[[OBJ]]"
 
-// RUN: %clangxx %s -nocudainc -nocudalib -flto -S -### 2> %t
+// RUN: %clangxx --target=x86_64-unknown-linux-gnu %s -nocudainc -nocudalib -flto -S -### 2> %t
 // RUN: FileCheck -check-prefix=CHECK-COMPILE-SUFFIXES < %t %s
 //
 // CHECK-COMPILE-SUFFIXES: "-o" "{{.*}}lto.s" "-x" "cuda" "{{.*}}lto.cu"
 
-// RUN: not %clangxx -nocudainc -nocudalib %s -emit-llvm 2>&1 \
+// RUN: not %clangxx --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib %s -emit-llvm 2>&1 \
 // RUN:    | FileCheck --check-prefix=LLVM-LINK %s
 // LLVM-LINK: -emit-llvm cannot be used when linking
 
 /// With ld.bfd or gold, link against LLVMgold.
-// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --sysroot=%S/Inputs/basic_cross_linux_tree %s \
+// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --offload-arch=sm_52 --sysroot=%S/Inputs/basic_cross_linux_tree %s \
 // RUN:   -fuse-ld=bfd -flto=thin -### 2>&1 | FileCheck --check-prefix=LLVMGOLD %s
-// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --sysroot=%S/Inputs/basic_cross_linux_tree %s \
+// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --offload-arch=sm_52 --sysroot=%S/Inputs/basic_cross_linux_tree %s \
 // RUN:   -fuse-ld=gold -flto=full -### 2>&1 | FileCheck --check-prefix=LLVMGOLD %s
 //
 // LLVMGOLD: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
 
 /// lld does not need LLVMgold.
-// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --sysroot=%S/Inputs/basic_cross_linux_tree %s \
+// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --offload-arch=sm_52 --sysroot=%S/Inputs/basic_cross_linux_tree %s \
 // RUN:   -fuse-ld=lld -flto=full -### 2>&1 | FileCheck --check-prefix=NO-LLVMGOLD %s
-// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --sysroot=%S/Inputs/basic_cross_linux_tree %s \
+// RUN: %clangxx -nocudainc -nocudalib --target=x86_64-unknown-linux-gnu --offload-arch=sm_52 --sysroot=%S/Inputs/basic_cross_linux_tree %s \
 // RUN:   -fuse-ld=gold -flto=full -fno-lto -### 2>&1 | FileCheck --check-prefix=NO-LLVMGOLD %s
 //
 // NO-LLVMGOLD-NOT: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
 
 // -flto passes along an explicit debugger tuning argument.
 // RUN: %clangxx -nocudainc -nocudalib \
-// RUN:          --target=x86_64-unknown-linux -### %s -flto -glldb --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2> %t
+// RUN:          --target=x86_64-unknown-linux -### %s -flto -glldb --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2> %t
 // RUN: FileCheck -check-prefix=CHECK-TUNING-LLDB < %t %s
 // RUN: %clangxx -nocudainc -nocudalib \
-// RUN:          --target=x86_64-unknown-linux -### %s -flto -g --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2> %t
+// RUN:          --target=x86_64-unknown-linux -### %s -flto -g --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2> %t
 // RUN: FileCheck -check-prefix=CHECK-NO-TUNING < %t %s
 //
 // CHECK-TUNING-LLDB:   "-plugin-opt=-debugger-tune=lldb"

diff  --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c
index 11fa080f6d8fda..98202d7f8e8637 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -10,33 +10,33 @@
 /// ###########################################################################
 
 /// Check -Xopenmp-target uses one of the archs provided when several archs are used.
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:          -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \
+// RUN:          -Xopenmp-target -march=sm_52 -Xopenmp-target -march=sm_60 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
 
 // CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
 
 /// ###########################################################################
 
-/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
-// RUN:   not %clang -### -fopenmp=libomp \
+/// Check -Xopenmp-target -march=sm_52 works as expected when two triples are present.
+// RUN:   %clang -### -fopenmp=libomp \
 // RUN:          -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
-// RUN:          -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
+// RUN:          -nogpulib -nogpuinc -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
 
-// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
+// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_52"
 
 /// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP.
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
+// RUN:          -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s
 
 // CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c"
 
 /// ###########################################################################
 
 /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
-// RUN:          -save-temps %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
+// RUN:         -nogpulib -nogpuinc --offload-arch=sm_52  -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
 
 // CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
@@ -45,8 +45,8 @@
 
 /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
 /// Check that the flag is passed when -fopenmp-relocatable-target is used.
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
-// RUN:          -save-temps %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
+// RUN:          -nogpulib -nogpuinc --offload-arch=sm_52 -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-RELO %s
 
 // CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
@@ -56,7 +56,7 @@
 /// Check that error is not thrown by toolchain when no cuda lib flag is used.
 /// Check that the flag is passed when -fopenmp-relocatable-target is used.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \
-// RUN:   -nocudalib -fopenmp-relocatable-target -save-temps %s 2>&1 \
+// RUN:   -nogpulib -nogpuinc -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FLAG-NOLIBDEVICE %s
 
 // CHK-FLAG-NOLIBDEVICE-NOT: error:{{.*}}sm_60
@@ -75,46 +75,46 @@
 
 /// Check that the runtime bitcode library is part of the compile line.
 /// Create a bogus bitcode library and specify it with libomptarget-nvptx-bc-path
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
-// RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:   -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB %s
 
 /// Specify the directory containing the bitcode lib, check clang picks the right one
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget \
-// RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:   -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=CHK-BCLIB-DIR %s
 
 /// Create a bogus bitcode library and find it with LIBRARY_PATH
-// RUN:   env LIBRARY_PATH=%S/Inputs/libomptarget/subdir not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:   env LIBRARY_PATH=%S/Inputs/libomptarget/subdir %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:   -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=CHK-ENV-BCLIB %s
 
 // CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
-// CHK-BCLIB-DIR: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget{{/|\\\\}}libomptarget-nvptx-sm_35.bc
-// CHK-ENV-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}subdir{{/|\\\\}}libomptarget-nvptx-sm_35.bc
+// CHK-BCLIB-DIR: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget{{/|\\\\}}libomptarget-nvptx-sm_52.bc
+// CHK-ENV-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}subdir{{/|\\\\}}libomptarget-nvptx-sm_52.bc
 // CHK-BCLIB-NOT: {{error:|warning:}}
 
 /// ###########################################################################
 
 /// Check that the warning is thrown when the libomptarget bitcode library is not found.
-/// Libomptarget requires sm_35 or newer so an sm_35 bitcode library should never exist.
+/// Libomptarget requires sm_52 or newer so an sm_52 bitcode library should never exist.
 // RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:   -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB-WARN %s
 
-// CHK-BCLIB-WARN: no library 'libomptarget-nvptx-sm_35.bc' found in the default clang lib directory or in LIBRARY_PATH; use '--libomptarget-nvptx-bc-path' to specify nvptx bitcode library
+// CHK-BCLIB-WARN: no library 'libomptarget-nvptx-sm_52.bc' found in the default clang lib directory or in LIBRARY_PATH; use '--libomptarget-nvptx-bc-path' to specify nvptx bitcode library
 
 /// ###########################################################################
 
 /// Check that the error is thrown when the libomptarget bitcode library does not exist.
 // RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:   -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   --libomptarget-nvptx-bc-path=not-exist.bc \
 // RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB-ERROR %s
@@ -125,24 +125,24 @@
 
 /// Check that the error is thrown when CUDA 9.1 or lower version is used.
 // RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \
+// RUN:   -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-CUDA-VERSION-ERROR %s
 
 // CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above; CUDA 9.0 detected
 
 /// Check that debug info is emitted in dwarf-2
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
 
 // DEBUG_DIRECTIVES-NOT: warning: debug
@@ -156,25 +156,25 @@
 // DEBUG_DIRECTIVES: ptxas
 // DEBUG_DIRECTIVES: "-lineinfo"
 
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
 
 // HAS_DEBUG-NOT: warning: debug
@@ -187,48 +187,49 @@
 // HAS_DEBUG-SAME: "--dont-merge-basicblocks"
 // HAS_DEBUG-SAME: "--return-at-end"
 
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
 // CUDA_MODE: "-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
 // CUDA_MODE-SAME: "-fopenmp-cuda-mode"
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
 // NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode"
 
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
+// RUN:   %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_RED_RECS %s
 // CUDA_RED_RECS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
 // CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048"
 
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:          --offload-arch=sm_52 --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
 // OPENMP_NVPTX_WRAPPERS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
 // OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
 
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:          -save-temps -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \
+// RUN:          --offload-arch=sm_52 -save-temps -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
 // RUN:   | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
 
 // SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"]
 
-// RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_35 \
-// RUN:          -save-temps %s -o openmp-offload-gpu 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_52 \
+// RUN:          -nogpulib -nogpuinc -save-temps %s -o openmp-offload-gpu 2>&1 \
 // RUN:   | FileCheck -check-prefix=TRIPLE %s
 
 // TRIPLE: "-triple" "nvptx64-nvidia-cuda"
-// TRIPLE: "-target-cpu" "sm_35"
+// TRIPLE: "-target-cpu" "sm_52"
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:          -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
@@ -274,9 +275,9 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib -save-temps %s 2>&1 | FileCheck %s --check-prefix=CHECK-TEMP-BINDINGS
 // CHECK-TEMP-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ:.+]]"], output: "[[BINARY:.+.out]]"
 
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70,sm_35,sm_80 --no-offload-arch=sm_35,sm_80 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60,sm_70,sm_80 --no-offload-arch=sm_60,sm_80 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
 // CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
 // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
 // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
@@ -326,14 +327,14 @@
 // CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
 // CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
 
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:     --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:     -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
 // CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
 // CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]"
 // CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o"
 
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:     --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:     -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
 // CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
@@ -341,8 +342,9 @@
 
 // CHECK-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
 
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 \
-// RUN:     %s 2>&1 | FileCheck --check-prefix=CHECK-NO-LTO-LIBRARY %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:     --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-LTO-LIBRARY %s
 
 // CHECK-NO-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
 
@@ -371,8 +373,8 @@
 //
 // Check that `-Xarch_host` works for OpenMP offloading.
 //
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
-// RUN:     -fopenmp-targets=nvptx64-nvidia-cuda -Xarch_host -O3 %s 2>&1 \
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN:     --offload-arch=sm_52 -nogpulib -nogpuinc -Xarch_host -O3 %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=XARCH-HOST %s
 // XARCH-HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-O3"
 // XARCH-HOST-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-O3"
@@ -380,8 +382,8 @@
 //
 // Check that `-Xarch_device` works for OpenMP offloading.
 //
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
-// RUN:     -fopenmp-targets=nvptx64-nvidia-cuda -Xarch_device -O3 %s 2>&1 \
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN:     --offload-arch=sm_52 -nogpulib -nogpuinc -Xarch_device -O3 %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=XARCH-DEVICE %s
 // XARCH-DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-O3"
 // XARCH-DEVICE-NOT: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-O3"

diff  --git a/clang/test/Driver/openmp-offload-infer.c b/clang/test/Driver/openmp-offload-infer.c
index d8c8946f96209f..d7c37716cd3029 100644
--- a/clang/test/Driver/openmp-offload-infer.c
+++ b/clang/test/Driver/openmp-offload-infer.c
@@ -2,8 +2,8 @@
 // REQUIRES: nvptx-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
-// RUN:          --offload-arch=sm_52 --offload-arch=gfx803 \
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN:          -nogpulib --offload-arch=sm_52 --offload-arch=gfx803 \
 // RUN:          --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc \
 // RUN:          --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
 // RUN:   | FileCheck %s
@@ -39,9 +39,7 @@
 // CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]"
 // CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
 
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \
-// RUN:     --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=native \
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
 // RUN:     --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=skylake \
 // RUN:     -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-FAILED