r253385 - [CUDA] use -aux-triple to pass target triple of opposite side of compilation

Tue Nov 17 14:28:41 PST 2015

Author: tra
Date: Tue Nov 17 16:28:40 2015
New Revision: 253385

URL: http://llvm.org/viewvc/llvm-project?rev=253385&view=rev
Log:
[CUDA] use -aux-triple to pass target triple of opposite side of compilation

Clang needs to know target triple for both sides of compilation so that
preprocessor macros and target builtins from both sides are available.

This change augments Compilation class to carry information about
toolchains used during different CUDA compilation passes and refactors
BuildActions to use it when it constructs CUDA jobs.

Removed DeviceTriple from CudaHostAction/CudaDeviceAction as it's no
longer needed.

Differential Revision: http://reviews.llvm.org/D13144

Modified:
    cfe/trunk/include/clang/Driver/Action.h
    cfe/trunk/include/clang/Driver/Compilation.h
    cfe/trunk/include/clang/Driver/Driver.h
    cfe/trunk/lib/Driver/Action.cpp
    cfe/trunk/lib/Driver/Compilation.cpp
    cfe/trunk/lib/Driver/Driver.cpp
    cfe/trunk/lib/Driver/Tools.cpp
    cfe/trunk/test/Driver/cuda-options.cu
    cfe/trunk/test/SemaCUDA/function-target-hd.cu

Modified: cfe/trunk/include/clang/Driver/Action.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================

--- cfe/trunk/include/clang/Driver/Action.h (original)
+++ cfe/trunk/include/clang/Driver/Action.h Tue Nov 17 16:28:40 2015
@@ -139,17 +139,15 @@ class CudaDeviceAction : public Action {
   virtual void anchor();
   /// GPU architecture to bind -- e.g 'sm_35'.
   const char *GpuArchName;
-  const char *DeviceTriple;
   /// True when action results are not consumed by the host action (e.g when
   /// -fsyntax-only or --cuda-device-only options are used).
   bool AtTopLevel;
 
 public:
   CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
-                   const char *DeviceTriple, bool AtTopLevel);
+                   bool AtTopLevel);
 
   const char *getGpuArchName() const { return GpuArchName; }
-  const char *getDeviceTriple() const { return DeviceTriple; }
   bool isAtTopLevel() const { return AtTopLevel; }
 
   static bool classof(const Action *A) {
@@ -160,16 +158,13 @@ public:
 class CudaHostAction : public Action {
   virtual void anchor();
   ActionList DeviceActions;
-  const char *DeviceTriple;
 
 public:
-  CudaHostAction(std::unique_ptr<Action> Input, const ActionList &DeviceActions,
-                 const char *DeviceTriple);
+  CudaHostAction(std::unique_ptr<Action> Input,
+                 const ActionList &DeviceActions);
   ~CudaHostAction() override;
 
-  ActionList &getDeviceActions() { return DeviceActions; }
   const ActionList &getDeviceActions() const { return DeviceActions; }
-  const char *getDeviceTriple() const { return DeviceTriple; }
 
   static bool classof(const Action *A) { return A->getKind() == CudaHostClass; }
 };

Modified: cfe/trunk/include/clang/Driver/Compilation.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Compilation.h?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/Compilation.h (original)
+++ cfe/trunk/include/clang/Driver/Compilation.h Tue Nov 17 16:28:40 2015
@@ -38,6 +38,9 @@ class Compilation {
   /// The default tool chain.
   const ToolChain &DefaultToolChain;
 
+  const ToolChain *CudaHostToolChain;
+  const ToolChain *CudaDeviceToolChain;
+
   /// The original (untranslated) input argument list.
   llvm::opt::InputArgList *Args;
 
@@ -81,6 +84,17 @@ public:
   const Driver &getDriver() const { return TheDriver; }
 
   const ToolChain &getDefaultToolChain() const { return DefaultToolChain; }
+  const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; }
+  const ToolChain *getCudaDeviceToolChain() const {
+    return CudaDeviceToolChain;
+  }
+
+  void setCudaHostToolChain(const ToolChain *HostToolChain) {
+    CudaHostToolChain = HostToolChain;
+  }
+  void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) {
+    CudaDeviceToolChain = DeviceToolChain;
+  }
 
   const llvm::opt::InputArgList &getInputArgs() const { return *Args; }
 

Modified: cfe/trunk/include/clang/Driver/Driver.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Driver.h?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/Driver.h (original)
+++ cfe/trunk/include/clang/Driver/Driver.h Tue Nov 17 16:28:40 2015
@@ -297,22 +297,23 @@ public:
   /// BuildActions - Construct the list of actions to perform for the
   /// given arguments, which are only done for a single architecture.
   ///
+  /// \param C - The compilation that is being built.
   /// \param TC - The default host tool chain.
   /// \param Args - The input arguments.
   /// \param Actions - The list to store the resulting actions onto.
-  void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args,
-                    const InputList &Inputs, ActionList &Actions) const;
+  void BuildActions(Compilation &C, const ToolChain &TC,
+                    llvm::opt::DerivedArgList &Args, const InputList &Inputs,
+                    ActionList &Actions) const;
 
   /// BuildUniversalActions - Construct the list of actions to perform
   /// for the given arguments, which may require a universal build.
   ///
+  /// \param C - The compilation that is being built.
   /// \param TC - The default host tool chain.
   /// \param Args - The input arguments.
   /// \param Actions - The list to store the resulting actions onto.
-  void BuildUniversalActions(const ToolChain &TC,
-                             llvm::opt::DerivedArgList &Args,
-                             const InputList &BAInputs,
-                             ActionList &Actions) const;
+  void BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                             const InputList &BAInputs) const;
 
   /// BuildJobs - Bind actions to concrete tools and translate
   /// arguments to form the list of jobs to run.

Modified: cfe/trunk/lib/Driver/Action.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/Action.cpp (original)
+++ cfe/trunk/lib/Driver/Action.cpp Tue Nov 17 16:28:40 2015
@@ -58,18 +58,15 @@ BindArchAction::BindArchAction(std::uniq
 void CudaDeviceAction::anchor() {}
 
 CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
-                                   const char *ArchName,
-                                   const char *DeviceTriple, bool AtTopLevel)
+                                   const char *ArchName, bool AtTopLevel)
     : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
-      DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {}
+      AtTopLevel(AtTopLevel) {}
 
 void CudaHostAction::anchor() {}
 
 CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
-                               const ActionList &DeviceActions,
-                               const char *DeviceTriple)
-    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions),
-      DeviceTriple(DeviceTriple) {}
+                               const ActionList &DeviceActions)
+    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {}
 
 CudaHostAction::~CudaHostAction() {
   for (auto &DA : DeviceActions)

Modified: cfe/trunk/lib/Driver/Compilation.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Compilation.cpp?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/Compilation.cpp (original)
+++ cfe/trunk/lib/Driver/Compilation.cpp Tue Nov 17 16:28:40 2015
@@ -24,8 +24,9 @@ using namespace llvm::opt;
 
 Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
                          InputArgList *_Args, DerivedArgList *_TranslatedArgs)
-    : TheDriver(D), DefaultToolChain(_DefaultToolChain), Args(_Args),
-      TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
+    : TheDriver(D), DefaultToolChain(_DefaultToolChain),
+      CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
+      Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
       ForDiagnostics(false) {}
 
 Compilation::~Compilation() {

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Tue Nov 17 16:28:40 2015
@@ -491,6 +491,10 @@ Compilation *Driver::BuildCompilation(Ar
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
 
+  C->setCudaDeviceToolChain(
+      &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
+                                                   ? "nvptx64-nvidia-cuda"
+                                                   : "nvptx-nvidia-cuda")));
   if (!HandleImmediateArgs(*C))
     return C;
 
@@ -501,10 +505,9 @@ Compilation *Driver::BuildCompilation(Ar
   // Construct the list of abstract actions to perform for this compilation. On
   // MachO targets this uses the driver-driver and universal actions.
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
-                          C->getActions());
+    BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
   else
-    BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
+    BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs,
                  C->getActions());
 
   if (CCCPrintPhases) {
@@ -617,9 +620,9 @@ void Driver::generateCompilationDiagnost
   // Darwin OSes this uses the driver-driver and builds universal actions.
   const ToolChain &TC = C.getDefaultToolChain();
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildUniversalActions(C, TC, Inputs);
   else
-    BuildActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildActions(C, TC, C.getArgs(), Inputs, C.getActions());
 
   BuildJobs(C);
 
@@ -948,7 +951,7 @@ static unsigned PrintActions1(const Comp
     os << '"' << CDA->getGpuArchName() << '"' << ", {"
        << PrintActions1(C, *CDA->begin(), Ids) << "}";
   } else {
-    ActionList *AL;
+    const ActionList *AL;
     if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
       os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
          << ", gpu binaries ";
@@ -997,9 +1000,10 @@ static bool ContainsCompileOrAssembleAct
   return false;
 }
 
-void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
-                                   const InputList &BAInputs,
-                                   ActionList &Actions) const {
+void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                                   const InputList &BAInputs) const {
+  DerivedArgList &Args = C.getArgs();
+  ActionList &Actions = C.getActions();
   llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
   // Collect the list of architectures. Duplicates are allowed, but should only
   // be handled once (in the order seen).
@@ -1028,7 +1032,7 @@ void Driver::BuildUniversalActions(const
     Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
 
   ActionList SingleActions;
-  BuildActions(TC, Args, BAInputs, SingleActions);
+  BuildActions(C, TC, Args, BAInputs, SingleActions);
 
   // Add in arch bindings for every top level action, as well as lipo and
   // dsymutil steps if needed.
@@ -1279,21 +1283,15 @@ void Driver::BuildInputs(const ToolChain
 // and returns a new CudaHostAction which wraps /p Current and device
 // side actions.
 static std::unique_ptr<Action>
-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
-                 const Arg *InputArg, std::unique_ptr<Action> HostAction,
-                 ActionList &Actions) {
-  // Figure out which NVPTX triple to use for device-side compilation based on
-  // whether host is 64-bit.
-  const char *DeviceTriple = TC.getTriple().isArch64Bit()
-                                 ? "nvptx64-nvidia-cuda"
-                                 : "nvptx-nvidia-cuda";
+buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
+                 std::unique_ptr<Action> HostAction, ActionList &Actions) {
   Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
                                                options::OPT_cuda_device_only);
   // Host-only compilation case.
   if (PartialCompilationArg &&
       PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
     return std::unique_ptr<Action>(
-        new CudaHostAction(std::move(HostAction), {}, DeviceTriple));
+        new CudaHostAction(std::move(HostAction), {}));
 
   // Collect all cuda_gpu_arch parameters, removing duplicates.
   SmallVector<const char *, 4> GpuArchList;
@@ -1317,8 +1315,11 @@ buildCudaActions(const Driver &D, const
     CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
 
   // Build actions for all device inputs.
+  assert(C.getCudaDeviceToolChain() &&
+         "Missing toolchain for device-side compilation.");
   ActionList CudaDeviceActions;
-  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
+  C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args,
+                             CudaDeviceInputs, CudaDeviceActions);
   assert(GpuArchList.size() == CudaDeviceActions.size() &&
          "Failed to create actions for all devices");
 
@@ -1342,14 +1343,15 @@ buildCudaActions(const Driver &D, const
     // -o is ambiguous if we have more than one top-level action.
     if (Args.hasArg(options::OPT_o) &&
         (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
-      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
+      C.getDriver().Diag(
+          clang::diag::err_drv_output_argument_with_multiple_files);
       return nullptr;
     }
 
     for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
       Actions.push_back(new CudaDeviceAction(
           std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-          DeviceTriple, /* AtTopLevel */ true));
+          /* AtTopLevel */ true));
     // Kill host action in case of device-only compilation.
     if (DeviceOnlyCompilation)
       HostAction.reset(nullptr);
@@ -1362,15 +1364,16 @@ buildCudaActions(const Driver &D, const
   for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
     DeviceActions.push_back(new CudaDeviceAction(
         std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-        DeviceTriple, /* AtTopLevel */ false));
+        /* AtTopLevel */ false));
   // Return a new host action that incorporates original host action and all
   // device actions.
   return std::unique_ptr<Action>(
-      new CudaHostAction(std::move(HostAction), DeviceActions, DeviceTriple));
+      new CudaHostAction(std::move(HostAction), DeviceActions));
 }
 
-void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
-                          const InputList &Inputs, ActionList &Actions) const {
+void Driver::BuildActions(Compilation &C, const ToolChain &TC,
+                          DerivedArgList &Args, const InputList &Inputs,
+                          ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
 
   if (!SuppressMissingInputWarning && Inputs.empty()) {
@@ -1500,8 +1503,8 @@ void Driver::BuildActions(const ToolChai
       Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
 
       if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
-        Current = buildCudaActions(*this, TC, Args, InputArg,
-                                   std::move(Current), Actions);
+        Current =
+            buildCudaActions(C, Args, InputArg, std::move(Current), Actions);
         if (!Current)
           break;
       }
@@ -1803,7 +1806,7 @@ void Driver::BuildJobsForAction(Compilat
     InputInfo II;
     // Append outputs of device jobs to the input list.
     for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+      BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
                          /*MultipleArchs*/ false, LinkingOutput, II);
       CudaDeviceInputInfos.push_back(II);
     }
@@ -1843,11 +1846,12 @@ void Driver::BuildJobsForAction(Compilat
   }
 
   if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    BuildJobsForAction(
-        C, *CDA->begin(),
-        &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())),
-        CDA->getGpuArchName(), CDA->isAtTopLevel(),
-        /*MultipleArchs*/ true, LinkingOutput, Result);
+    // Initial processing of CudaDeviceAction carries host params.
+    // Call BuildJobsForAction() again, now with correct device parameters.
+    assert(CDA->getGpuArchName() && "No GPU name in device action.");
+    BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
+                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
+                       /*MultipleArchs*/ true, LinkingOutput, Result);
     return;
   }
 

Modified: cfe/trunk/lib/Driver/Tools.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/Tools.cpp (original)
+++ cfe/trunk/lib/Driver/Tools.cpp Tue Nov 17 16:28:40 2015
@@ -3262,6 +3262,23 @@ void Clang::ConstructJob(Compilation &C,
   CmdArgs.push_back("-triple");
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
+  if (IsCuda) {
+    // FIXME: We need a (better) way to pass information about
+    // particular compilation pass we're constructing here. For now we
+    // can check which toolchain we're using and pick the other one to
+    // extract the triple.
+    const ToolChain *AuxToolChain;
+    if (&getToolChain() == C.getCudaDeviceToolChain())
+      AuxToolChain = C.getCudaHostToolChain();
+    else if (&getToolChain() == C.getCudaHostToolChain())
+      AuxToolChain = C.getCudaDeviceToolChain();
+    else
+      llvm_unreachable("Can't figure out CUDA compilation mode.");
+    assert(AuxToolChain != nullptr && "No aux toolchain.");
+    CmdArgs.push_back("-aux-triple");
+    CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
+  }
+
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
                                Triple.getArch() == llvm::Triple::thumb)) {
     unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;

Modified: cfe/trunk/test/Driver/cuda-options.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/test/Driver/cuda-options.cu (original)
+++ cfe/trunk/test/Driver/cuda-options.cu Tue Nov 17 16:28:40 2015
@@ -111,14 +111,6 @@
 // Make sure we don't link anything.
 // RUN:   -check-prefix CUDA-NL %s
 
-// Match device-side preprocessor, and compiler phases with -save-temps
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda"
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
-
 // --cuda-host-only should never trigger unused arg warning.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \
 // RUN:    FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s
@@ -133,34 +125,47 @@
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \
 // RUN:    FileCheck -check-prefix CUDA-UNUSED-CDO %s
 
+// Match device-side preprocessor, and compiler phases with -save-temps
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda"
+
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
+
 // Match the job that produces PTX assembly
-// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D1-SAME: "-fcuda-is-device"
 // CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
 // CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
 // CUDA-D1NS-SAME: "-x" "cuda"
 // CUDA-D1S-SAME: "-x" "ir"
 
-// Match anothe device-side compilation
-// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// Match another device-side compilation
+// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D2-SAME: "-fcuda-is-device"
 // CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
 // CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
 // CUDA-D2-SAME: "-x" "cuda"
 
 // Match no device-side compilation
-// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // CUDA-ND-SAME-NOT: "-fcuda-is-device"
 
 // Match host-side preprocessor job with -save-temps
-// CUDA-HS: "-cc1" "-triple"
-// CUDA-HS-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // CUDA-HS-SAME-NOT: "-fcuda-is-device"
 // CUDA-HS-SAME: "-x" "cuda"
 
 // Match host-side compilation
-// CUDA-H: "-cc1" "-triple"
-// CUDA-H-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // CUDA-H-SAME-NOT: "-fcuda-is-device"
 // CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
 // CUDA-HNS-SAME: "-x" "cuda"

Modified: cfe/trunk/test/SemaCUDA/function-target-hd.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/function-target-hd.cu?rev=253385&r1=253384&r2=253385&view=diff
==============================================================================
--- cfe/trunk/test/SemaCUDA/function-target-hd.cu (original)
+++ cfe/trunk/test/SemaCUDA/function-target-hd.cu Tue Nov 17 16:28:40 2015
@@ -8,9 +8,9 @@
 // host device functions are not allowed to call device functions.
 
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s
 // RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
 
 #include "Inputs/cuda.h"