[PATCH] D16307: [CUDA] Handle -O options (more) correctly.
Justin Lebar via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 18 16:19:14 PST 2016
jlebar created this revision.
jlebar added a reviewer: tra.
jlebar added subscribers: jhen, echristo, cfe-commits.
Previously we'd crash the driver if you passed -O0. Now we try to
handle all of clang's various optimization flags in a sane way.
http://reviews.llvm.org/D16307
Files:
lib/Driver/Tools.cpp
test/Driver/cuda-external-tools.cu
Index: test/Driver/cuda-external-tools.cu
===================================================================
--- test/Driver/cuda-external-tools.cu
+++ test/Driver/cuda-external-tools.cu
@@ -4,14 +4,31 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
-// Regular compile with -O2.
+// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3.
+// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
+// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
// Regular compile without -O. This should result in us passing -O0 to ptxas.
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
+// Regular compiles with -Os and -Oz. For lack of a better option, we map
+// these to ptxas -O3.
+// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+
// Regular compile targeting sm_35.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
@@ -42,7 +59,9 @@
// ARCH64: "-m64"
// ARCH32: "-m32"
// OPT0: "-O0"
+// OPT1: "-O1"
// OPT2: "-O2"
+// OPT3: "-O3"
// SM20: "--gpu-name" "sm_20"
// SM35: "--gpu-name" "sm_35"
// SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -10645,10 +10645,34 @@
ArgStringList CmdArgs;
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
- // Clang's default optimization level is -O0, but ptxas's default is -O3.
- CmdArgs.push_back(Args.MakeArgString(
- llvm::Twine("-O") +
- Args.getLastArgValue(options::OPT_O_Group, "0").data()));
+ // Map the -O we received to -O{0,1,2,3}.
+ //
+ // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default,
+ // so it may correspond more closely to the spirit of clang -O2.
+ if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+ // -O3 seems like the least-bad option when -Osomething is specified to
+ // clang but it isn't handled above.
+ StringRef OOpt = "3";
+ if (A->getOption().matches(options::OPT_O4) ||
+ A->getOption().matches(options::OPT_Ofast))
+ OOpt = "3";
+ else if (A->getOption().matches(options::OPT_O0))
+ OOpt = "0";
+ else if (A->getOption().matches(options::OPT_O)) {
+ // -Os, -Oz, and -O(anything else) map to -O3, for lack of better options.
+ OOpt = llvm::StringSwitch<const char *>(A->getValue())
+ .Case("1", "1")
+ .Case("2", "2")
+ .Case("s", "3")
+ .Case("z", "3")
+ .Default("3");
+ }
+ CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
+ } else {
+ // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
+ // to no optimizations, but ptxas's default is -O3.
+ CmdArgs.push_back("-O0");
+ }
// Don't bother passing -g to ptxas: It's enabled by default at -O0, and
// not supported at other optimization levels.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16307.45219.patch
Type: text/x-patch
Size: 4142 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160119/7613d3a8/attachment.bin>
More information about the cfe-commits
mailing list