r289287 - [CUDA,Driver] Added --no-cuda-gpu-arch= option.

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 9 14:59:18 PST 2016


Author: tra
Date: Fri Dec  9 16:59:17 2016
New Revision: 289287

URL: http://llvm.org/viewvc/llvm-project?rev=289287&view=rev
Log:
[CUDA,Driver] Added --no-cuda-gpu-arch= option.

This allows us to negate preceding --cuda-gpu-arch=X.
This comes handy when user needs to override default
flags set for them by the build system.

Differential Revision: https://reviews.llvm.org/D27631

Modified:
    cfe/trunk/include/clang/Driver/Options.td
    cfe/trunk/lib/Driver/Driver.cpp
    cfe/trunk/test/Driver/cuda-options.cu

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=289287&r1=289286&r2=289287&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Fri Dec  9 16:59:17 2016
@@ -414,6 +414,9 @@ def cuda_compile_host_device : Flag<["--
            "effect on non-CUDA compilations.">;
 def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
   HelpText<"CUDA GPU architecture (e.g. sm_35).  May be specified more than once.">;
+def no_cuda_gpu_arch_EQ : Joined<["--"], "no-cuda-gpu-arch=">, Flags<[DriverOption]>,
+  HelpText<"Remove GPU architecture (e.g. sm_35) from the list of GPUs to compile for. "
+           "'all' resets the list to its default value.">;
 def cuda_noopt_device_debug : Flag<["--"], "cuda-noopt-device-debug">,
   HelpText<"Enable device-side debug info generation. Disables ptxas optimizations.">;
 def no_cuda_version_check : Flag<["--"], "no-cuda-version-check">,

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=289287&r1=289286&r2=289287&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Fri Dec  9 16:59:17 2016
@@ -1919,25 +1919,39 @@ class OffloadingActionBuilder final {
                               options::OPT_cuda_device_only);
 
       // Collect all cuda_gpu_arch parameters, removing duplicates.
-      llvm::SmallSet<CudaArch, 4> GpuArchs;
+      std::set<CudaArch> GpuArchs;
       bool Error = false;
       for (Arg *A : Args) {
-        if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
+        if (!(A->getOption().matches(options::OPT_cuda_gpu_arch_EQ) ||
+              A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ)))
           continue;
         A->claim();
 
-        const auto &ArchStr = A->getValue();
+        const StringRef ArchStr = A->getValue();
+        if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ) &&
+            ArchStr == "all") {
+          GpuArchs.clear();
+          continue;
+        }
         CudaArch Arch = StringToCudaArch(ArchStr);
         if (Arch == CudaArch::UNKNOWN) {
           C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
           Error = true;
-        } else if (GpuArchs.insert(Arch).second)
-          GpuArchList.push_back(Arch);
+        } else if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
+          GpuArchs.insert(Arch);
+        else if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ))
+          GpuArchs.erase(Arch);
+        else
+          llvm_unreachable("Unexpected option.");
       }
 
-      // Default to sm_20 which is the lowest common denominator for supported
-      // GPUs.
-      // sm_20 code should work correctly, if suboptimally, on all newer GPUs.
+      // Collect list of GPUs remaining in the set.
+      for (CudaArch Arch : GpuArchs)
+        GpuArchList.push_back(Arch);
+
+      // Default to sm_20 which is the lowest common denominator for
+      // supported GPUs.  sm_20 code should work correctly, if
+      // suboptimally, on all newer GPUs.
       if (GpuArchList.empty())
         GpuArchList.push_back(CudaArch::SM_20);
 

Modified: cfe/trunk/test/Driver/cuda-options.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=289287&r1=289286&r2=289287&view=diff
==============================================================================
--- cfe/trunk/test/Driver/cuda-options.cu (original)
+++ cfe/trunk/test/Driver/cuda-options.cu Fri Dec  9 16:59:17 2016
@@ -64,9 +64,9 @@
 
 // Verify that --cuda-gpu-arch option passes the correct GPU archtecture to
 // device compilation.
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN:    -check-prefix DEVICE-SM35 -check-prefix HOST \
+// RUN:    -check-prefix DEVICE-SM30 -check-prefix HOST \
 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
 
 // Verify that there is one device-side compilation per --cuda-gpu-arch args
@@ -74,8 +74,8 @@
 // RUN: %clang -### -target x86_64-linux-gnu \
 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN:    -check-prefix DEVICE2 -check-prefix DEVICE-SM35 \
-// RUN:    -check-prefix DEVICE2-SM30 -check-prefix HOST \
+// RUN:    -check-prefix DEVICE2 -check-prefix DEVICE-SM30 \
+// RUN:    -check-prefix DEVICE2-SM35 -check-prefix HOST \
 // RUN:    -check-prefix HOST-NOSAVE -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix NOLINK %s
 
@@ -92,6 +92,65 @@
 // RUN:    -check-prefix HOST -check-prefix HOST-NOSAVE \
 // RUN:    -check-prefix HOST-AS -check-prefix NOLINK %s
 
+// Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
+// a) --no-cuda-gpu-arch=X negates preceeding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-gpu-arch=sm_35 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
+
+// b) --no-cuda-gpu-arch=X negates more than one preceeding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-gpu-arch=sm_35 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
+
+// c) if --no-cuda-gpu-arch=X negates all preceeding --cuda-gpu-arch=X
+//    we default to sm_20 -- same as if no --cuda-gpu-arch were passed.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s
+
+// d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\
+// RUN:   --no-cuda-gpu-arch=sm_50 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
+
+// e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
+
+// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-gpu-arch=all \
+// RUN:   --cuda-gpu-arch=sm_35 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s
+
+// g) There's no --cuda-gpu-arch=all
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --cuda-gpu-arch=all \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCHALLERROR %s
+
+// ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
+// NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
+// ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
+// NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30"
+// ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35"
+// NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35"
+// ARCHALLERROR: error: Unsupported CUDA gpu architecture: all
+
 // Match device-side preprocessor and compiler phases with -save-temps.
 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64--linux-gnu"
@@ -107,14 +166,14 @@
 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64--linux-gnu"
 // DEVICE-SAME: "-fcuda-is-device"
-// DEVICE-SM35-SAME: "-target-cpu" "sm_35"
+// DEVICE-SM30-SAME: "-target-cpu" "sm_30"
 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
 // DEVICE-NOSAVE-SAME: "-x" "cuda"
 // DEVICE-SAVE-SAME: "-x" "ir"
 
 // Match the call to ptxas (which assembles PTX to SASS).
 // DEVICE:ptxas
-// DEVICE-SM35-DAG: "--gpu-name" "sm_35"
+// DEVICE-SM30-DAG: "--gpu-name" "sm_30"
 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
 // DEVICE-DAG: "[[PTXFILE]]"
 
@@ -122,7 +181,7 @@
 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // DEVICE2-SAME: "-aux-triple" "x86_64--linux-gnu"
 // DEVICE2-SAME: "-fcuda-is-device"
-// DEVICE2-SM30-SAME: "-target-cpu" "sm_30"
+// DEVICE2-SM35-SAME: "-target-cpu" "sm_35"
 // DEVICE2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
 // DEVICE2-SAME: "-x" "cuda"
 




More information about the cfe-commits mailing list