r329737 - [CUDA] Added --[no-]cuda-include-ptx=sm_XX|all option.

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 10 11:38:22 PDT 2018


Author: tra
Date: Tue Apr 10 11:38:22 2018
New Revision: 329737

URL: http://llvm.org/viewvc/llvm-project?rev=329737&view=rev
Log:
[CUDA] Added --[no-]cuda-include-ptx=sm_XX|all option.

Currently we always include PTX into the fatbin along
with the GPU code.It about doubles the size of the GPU binary
we need to carry in the executable. These options allow control
inclusion of PTX into GPU binary.

This patch does not change the defaults, though we may consider
making no-PTX the default in the future.

Differential Revision: https://reviews.llvm.org/D45495

Modified:
    cfe/trunk/docs/ClangCommandLineReference.rst
    cfe/trunk/include/clang/Driver/Options.td
    cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
    cfe/trunk/test/Driver/cuda-options.cu

Modified: cfe/trunk/docs/ClangCommandLineReference.rst
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/ClangCommandLineReference.rst?rev=329737&r1=329736&r2=329737&view=diff
==============================================================================
--- cfe/trunk/docs/ClangCommandLineReference.rst (original)
+++ cfe/trunk/docs/ClangCommandLineReference.rst Tue Apr 10 11:38:22 2018
@@ -144,6 +144,10 @@ Compile CUDA code for device only
 
 CUDA GPU architecture (e.g. sm\_35).  May be specified more than once.
 
+.. option:: --cuda-include-ptx=<arg>, --no-cuda-include-ptx=<arg>
+
+Include (or not) PTX along with CUDA GPU binary for the given architecture (e.g. sm\_35). Argument may be 'all'. The option may be specified more than once. Default: --cuda-include-ptx=all
+
 .. option:: --cuda-host-only
 
 Compile CUDA code for host only.  Has no effect on non-CUDA compilations.

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=329737&r1=329736&r2=329737&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Tue Apr 10 11:38:22 2018
@@ -546,6 +546,10 @@ def cuda_host_only : Flag<["--"], "cuda-
 def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">,
   HelpText<"Compile CUDA code for both host and device (default).  Has no "
            "effect on non-CUDA compilations.">;
+def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[DriverOption]>,
+  HelpText<"Include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
+def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>,
+  HelpText<"Do not include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
 def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
   HelpText<"CUDA GPU architecture (e.g. sm_35).  May be specified more than once.">;
 def no_cuda_gpu_arch_EQ : Joined<["--"], "no-cuda-gpu-arch=">, Flags<[DriverOption]>,

Modified: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Cuda.cpp?rev=329737&r1=329736&r2=329737&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp Tue Apr 10 11:38:22 2018
@@ -377,6 +377,22 @@ void NVPTX::Assembler::ConstructJob(Comp
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
+static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
+  bool includePTX = true;
+  for (Arg *A : Args) {
+    if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
+          A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
+      continue;
+    A->claim();
+    const StringRef ArchStr = A->getValue();
+    if (ArchStr == "all" || ArchStr == gpu_arch) {
+      includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
+      continue;
+    }
+  }
+  return includePTX;
+}
+
 // All inputs to this linker must be from CudaDeviceActions, as we need to look
 // at the Inputs' Actions in order to figure out which GPU architecture they
 // correspond to.
@@ -404,6 +420,9 @@ void NVPTX::Linker::ConstructJob(Compila
            "Device action expected to have associated a GPU architecture!");
     CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
 
+    if (II.getType() == types::TY_PP_Asm &&
+        !shouldIncludePTX(Args, gpu_arch_str))
+      continue;
     // We need to pass an Arch of the form "sm_XX" for cubin files and
     // "compute_XX" for ptx.
     const char *Arch =

Modified: cfe/trunk/test/Driver/cuda-options.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=329737&r1=329736&r2=329737&view=diff
==============================================================================
--- cfe/trunk/test/Driver/cuda-options.cu (original)
+++ cfe/trunk/test/Driver/cuda-options.cu Tue Apr 10 11:38:22 2018
@@ -142,6 +142,48 @@
 // RUN:   -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix ARCHALLERROR %s
 
+
+// Verify that --[no-]cuda-include-ptx arguments are handled correctly.
+// a) by default we're including PTX for all GPUs.
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
+
+// b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-include-ptx=all \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s
+
+// c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-include-ptx=sm_35 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-include-ptx=sm_30 \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s
+
+// d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-include-ptx=all --cuda-include-ptx=all \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
+
+// e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN:   --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \
+// RUN:   -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
+
+
 // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
 // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
 // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
@@ -236,3 +278,12 @@
 
 // Match no linker.
 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
+
+// FATBIN-COMMON:fatbinary
+// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
+// FATBIN-COMMON: "--image=profile=sm_30,file=
+// PTX-SM30: "--image=profile=compute_30,file=
+// NOPTX-SM30-NOT: "--image=profile=compute_30,file=
+// FATBIN-COMMON: "--image=profile=sm_35,file=
+// PTX-SM35: "--image=profile=compute_35,file=
+// NOPTX-SM35-NOT: "--image=profile=compute_35,file=




More information about the cfe-commits mailing list