r258174 - [CUDA] Handle -O options (more) correctly.

Justin Lebar via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 19 11:52:24 PST 2016


Author: jlebar
Date: Tue Jan 19 13:52:21 2016
New Revision: 258174

URL: http://llvm.org/viewvc/llvm-project?rev=258174&view=rev
Log:
[CUDA] Handle -O options (more) correctly.

Summary:
Previously we'd crash the driver if you passed -O0.  Now we try to
handle all of clang's various optimization flags in a sane way.

Reviewers: tra

Subscribers: cfe-commits, echristo, jhen

Differential Revision: http://reviews.llvm.org/D16307

Modified:
    cfe/trunk/lib/Driver/Tools.cpp
    cfe/trunk/test/Driver/cuda-external-tools.cu

Modified: cfe/trunk/lib/Driver/Tools.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=258174&r1=258173&r2=258174&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/Tools.cpp (original)
+++ cfe/trunk/lib/Driver/Tools.cpp Tue Jan 19 13:52:21 2016
@@ -10663,10 +10663,35 @@ void NVPTX::Assembler::ConstructJob(Comp
   ArgStringList CmdArgs;
   CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
 
-  // Clang's default optimization level is -O0, but ptxas's default is -O3.
-  CmdArgs.push_back(Args.MakeArgString(
-      llvm::Twine("-O") +
-      Args.getLastArgValue(options::OPT_O_Group, "0").data()));
+  // Map the -O we received to -O{0,1,2,3}.
+  //
+  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default,
+  // so it may correspond more closely to the spirit of clang -O2.
+  if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+    // -O3 seems like the least-bad option when -Osomething is specified to
+    // clang but it isn't handled below.
+    StringRef OOpt = "3";
+    if (A->getOption().matches(options::OPT_O4) ||
+        A->getOption().matches(options::OPT_Ofast))
+      OOpt = "3";
+    else if (A->getOption().matches(options::OPT_O0))
+      OOpt = "0";
+    else if (A->getOption().matches(options::OPT_O)) {
+      // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
+      OOpt = llvm::StringSwitch<const char *>(A->getValue())
+                 .Case("1", "1")
+                 .Case("2", "2")
+                 .Case("3", "3")
+                 .Case("s", "2")
+                 .Case("z", "2")
+                 .Default("2");
+    }
+    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
+  } else {
+    // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
+    // to no optimizations, but ptxas's default is -O3.
+    CmdArgs.push_back("-O0");
+  }
 
   // Don't bother passing -g to ptxas: It's enabled by default at -O0, and
   // not supported at other optimization levels.

Modified: cfe/trunk/test/Driver/cuda-external-tools.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-external-tools.cu?rev=258174&r1=258173&r2=258174&view=diff
==============================================================================
--- cfe/trunk/test/Driver/cuda-external-tools.cu (original)
+++ cfe/trunk/test/Driver/cuda-external-tools.cu Tue Jan 19 13:52:21 2016
@@ -4,14 +4,31 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// Regular compile with -O2.
+// Regular compiles with -O{0,1,2,3,4,fast}.  -O4 and -Ofast map to ptxas O3.
+// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
+// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
 // RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
 
 // Regular compile without -O.  This should result in us passing -O0 to ptxas.
 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
 
+// Regular compiles with -Os and -Oz.  For lack of a better option, we map
+// these to ptxas -O3.
+// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+
 // Regular compile targeting sm_35.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
@@ -42,7 +59,9 @@
 // ARCH64: "-m64"
 // ARCH32: "-m32"
 // OPT0: "-O0"
+// OPT1: "-O1"
 // OPT2: "-O2"
+// OPT3: "-O3"
 // SM20: "--gpu-name" "sm_20"
 // SM35: "--gpu-name" "sm_35"
 // SM20: "--output-file" "[[CUBINFILE:[^"]*]]"




More information about the cfe-commits mailing list