r289287 - [CUDA,Driver] Added --no-cuda-gpu-arch= option.
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 9 14:59:18 PST 2016
Author: tra
Date: Fri Dec 9 16:59:17 2016
New Revision: 289287
URL: http://llvm.org/viewvc/llvm-project?rev=289287&view=rev
Log:
[CUDA,Driver] Added --no-cuda-gpu-arch= option.
This allows us to negate preceding --cuda-gpu-arch=X.
This comes handy when user needs to override default
flags set for them by the build system.
Differential Revision: https://reviews.llvm.org/D27631
Modified:
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/test/Driver/cuda-options.cu
Modified: cfe/trunk/include/clang/Driver/Options.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=289287&r1=289286&r2=289287&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Fri Dec 9 16:59:17 2016
@@ -414,6 +414,9 @@ def cuda_compile_host_device : Flag<["--
"effect on non-CUDA compilations.">;
def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
HelpText<"CUDA GPU architecture (e.g. sm_35). May be specified more than once.">;
+def no_cuda_gpu_arch_EQ : Joined<["--"], "no-cuda-gpu-arch=">, Flags<[DriverOption]>,
+ HelpText<"Remove GPU architecture (e.g. sm_35) from the list of GPUs to compile for. "
+ "'all' resets the list to its default value.">;
def cuda_noopt_device_debug : Flag<["--"], "cuda-noopt-device-debug">,
HelpText<"Enable device-side debug info generation. Disables ptxas optimizations.">;
def no_cuda_version_check : Flag<["--"], "no-cuda-version-check">,
Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=289287&r1=289286&r2=289287&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Fri Dec 9 16:59:17 2016
@@ -1919,25 +1919,39 @@ class OffloadingActionBuilder final {
options::OPT_cuda_device_only);
// Collect all cuda_gpu_arch parameters, removing duplicates.
- llvm::SmallSet<CudaArch, 4> GpuArchs;
+ std::set<CudaArch> GpuArchs;
bool Error = false;
for (Arg *A : Args) {
- if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
+ if (!(A->getOption().matches(options::OPT_cuda_gpu_arch_EQ) ||
+ A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ)))
continue;
A->claim();
- const auto &ArchStr = A->getValue();
+ const StringRef ArchStr = A->getValue();
+ if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ) &&
+ ArchStr == "all") {
+ GpuArchs.clear();
+ continue;
+ }
CudaArch Arch = StringToCudaArch(ArchStr);
if (Arch == CudaArch::UNKNOWN) {
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
Error = true;
- } else if (GpuArchs.insert(Arch).second)
- GpuArchList.push_back(Arch);
+ } else if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
+ GpuArchs.insert(Arch);
+ else if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ))
+ GpuArchs.erase(Arch);
+ else
+ llvm_unreachable("Unexpected option.");
}
- // Default to sm_20 which is the lowest common denominator for supported
- // GPUs.
- // sm_20 code should work correctly, if suboptimally, on all newer GPUs.
+ // Collect list of GPUs remaining in the set.
+ for (CudaArch Arch : GpuArchs)
+ GpuArchList.push_back(Arch);
+
+ // Default to sm_20 which is the lowest common denominator for
+ // supported GPUs. sm_20 code should work correctly, if
+ // suboptimally, on all newer GPUs.
if (GpuArchList.empty())
GpuArchList.push_back(CudaArch::SM_20);
Modified: cfe/trunk/test/Driver/cuda-options.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=289287&r1=289286&r2=289287&view=diff
==============================================================================
--- cfe/trunk/test/Driver/cuda-options.cu (original)
+++ cfe/trunk/test/Driver/cuda-options.cu Fri Dec 9 16:59:17 2016
@@ -64,9 +64,9 @@
// Verify that --cuda-gpu-arch option passes the correct GPU archtecture to
// device compilation.
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix DEVICE-SM35 -check-prefix HOST \
+// RUN: -check-prefix DEVICE-SM30 -check-prefix HOST \
// RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
// Verify that there is one device-side compilation per --cuda-gpu-arch args
@@ -74,8 +74,8 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix DEVICE2 -check-prefix DEVICE-SM35 \
-// RUN: -check-prefix DEVICE2-SM30 -check-prefix HOST \
+// RUN: -check-prefix DEVICE2 -check-prefix DEVICE-SM30 \
+// RUN: -check-prefix DEVICE2-SM35 -check-prefix HOST \
// RUN: -check-prefix HOST-NOSAVE -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix NOLINK %s
@@ -92,6 +92,65 @@
// RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \
// RUN: -check-prefix HOST-AS -check-prefix NOLINK %s
+// Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
+// a) --no-cuda-gpu-arch=X negates preceeding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=sm_35 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
+
+// b) --no-cuda-gpu-arch=X negates more than one preceeding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=sm_35 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
+
+// c) if --no-cuda-gpu-arch=X negates all preceeding --cuda-gpu-arch=X
+// we default to sm_20 -- same as if no --cuda-gpu-arch were passed.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s
+
+// d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\
+// RUN: --no-cuda-gpu-arch=sm_50 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
+
+// e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
+
+// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=all \
+// RUN: --cuda-gpu-arch=sm_35 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s
+
+// g) There's no --cuda-gpu-arch=all
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --cuda-gpu-arch=all \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCHALLERROR %s
+
+// ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
+// NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
+// ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
+// NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30"
+// ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35"
+// NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35"
+// ARCHALLERROR: error: Unsupported CUDA gpu architecture: all
+
// Match device-side preprocessor and compiler phases with -save-temps.
// DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// DEVICE-SAVE-SAME: "-aux-triple" "x86_64--linux-gnu"
@@ -107,14 +166,14 @@
// DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64--linux-gnu"
// DEVICE-SAME: "-fcuda-is-device"
-// DEVICE-SM35-SAME: "-target-cpu" "sm_35"
+// DEVICE-SM30-SAME: "-target-cpu" "sm_30"
// DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
// DEVICE-NOSAVE-SAME: "-x" "cuda"
// DEVICE-SAVE-SAME: "-x" "ir"
// Match the call to ptxas (which assembles PTX to SASS).
// DEVICE:ptxas
-// DEVICE-SM35-DAG: "--gpu-name" "sm_35"
+// DEVICE-SM30-DAG: "--gpu-name" "sm_30"
// DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
// DEVICE-DAG: "[[PTXFILE]]"
@@ -122,7 +181,7 @@
// DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// DEVICE2-SAME: "-aux-triple" "x86_64--linux-gnu"
// DEVICE2-SAME: "-fcuda-is-device"
-// DEVICE2-SM30-SAME: "-target-cpu" "sm_30"
+// DEVICE2-SM35-SAME: "-target-cpu" "sm_35"
// DEVICE2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
// DEVICE2-SAME: "-x" "cuda"
More information about the cfe-commits
mailing list