[PATCH] D11310: [CUDA] Moved device-side triple calculation to buildCudaActions

Fri Jul 17 13:51:21 PDT 2015

Artem Belevich <tra at google.com> writes:
> tra created this revision.
> tra added reviewers: bogner, echristo.
> tra added a subscriber: cfe-commits.
>
> This attempts to keep this bit of cuda-specific knowledge out of
> relatively generic BuildJobsForActions.

LGTM.

> http://reviews.llvm.org/D11310
>
> Files:
>   include/clang/Driver/Action.h
>   lib/Driver/Action.cpp
>   lib/Driver/Driver.cpp
>
> Index: lib/Driver/Driver.cpp
> ===================================================================
> --- lib/Driver/Driver.cpp
> +++ lib/Driver/Driver.cpp
> @@ -1237,11 +1237,8 @@
>  // side actions.
>  static std::unique_ptr<Action>
>  buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
> -                 const Arg *InputArg, const types::ID InputType,
> -                 std::unique_ptr<Action> Current, ActionList &Actions) {
> -
> -  assert(InputType == types::TY_CUDA &&
> -         "CUDA Actions only apply to CUDA inputs.");
> +                 const Arg *InputArg, std::unique_ptr<Action> HostAction,
> +                 ActionList &Actions) {
>  
>    // Collect all cuda_gpu_arch parameters, removing duplicates.
>    SmallVector<const char *, 4> GpuArchList;
> @@ -1279,6 +1276,12 @@
>      }
>    }
>  
> +  // Figure out which NVPTX triple to use for device-side compilation based on
> +  // whether host is 64-bit.
> +  const char *DeviceTriple = TC.getTriple().isArch64Bit()
> +                                 ? "nvptx64-nvidia-cuda"
> +                                 : "nvptx-nvidia-cuda";
> +
>    // Figure out what to do with device actions -- pass them as inputs to the
>    // host action or run each of them independently.
>    bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only);
> @@ -1295,26 +1298,26 @@
>      }
>  
>      for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
> -      Actions.push_back(
> -          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[I]),
> -                               GpuArchList[I], /* AtTopLevel */ true));
> +      Actions.push_back(new CudaDeviceAction(
> +          std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
> +          DeviceTriple, /* AtTopLevel */ true));
>      // Kill host action in case of device-only compilation.
>      if (DeviceOnlyCompilation)
> -      Current.reset(nullptr);
> -    return Current;
> +      HostAction.reset(nullptr);
> +    return HostAction;
>    }
>  
>    // Outputs of device actions during complete CUDA compilation get created
>    // with AtTopLevel=false and become inputs for the host action.
>    ActionList DeviceActions;
>    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
> -    DeviceActions.push_back(
> -        new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[I]),
> -                             GpuArchList[I], /* AtTopLevel */ false));
> +    DeviceActions.push_back(new CudaDeviceAction(
> +        std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
> +        DeviceTriple, /* AtTopLevel */ false));
>    // Return a new host action that incorporates original host action and all
>    // device actions.
>    return std::unique_ptr<Action>(
> -      new CudaHostAction(std::move(Current), DeviceActions));
> +      new CudaHostAction(std::move(HostAction), DeviceActions));
>  }
>  
>  void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
> @@ -1451,7 +1454,7 @@
>        Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
>  
>        if (InjectCuda && Phase == CudaInjectionPhase) {
> -        Current = buildCudaActions(*this, TC, Args, InputArg, InputType,
> +        Current = buildCudaActions(*this, TC, Args, InputArg,
>                                     std::move(Current), Actions);
>          if (!Current)
>            break;
> @@ -1794,15 +1797,11 @@
>    }
>  
>    if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
> -    // Figure out which NVPTX triple to use for device-side compilation based on
> -    // whether host is 64-bit.
> -    llvm::Triple DeviceTriple(TC->getTriple().isArch64Bit()
> -                                  ? "nvptx64-nvidia-cuda"
> -                                  : "nvptx-nvidia-cuda");
> -    BuildJobsForAction(C, *CDA->begin(),
> -                       &getToolChain(C.getArgs(), DeviceTriple),
> -                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
> -                       /*MultipleArchs*/ true, LinkingOutput, Result);
> +    BuildJobsForAction(
> +        C, *CDA->begin(),
> +        &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())),
> +        CDA->getGpuArchName(), CDA->isAtTopLevel(),
> +        /*MultipleArchs*/ true, LinkingOutput, Result);
>      return;
>    }
>  
> Index: lib/Driver/Action.cpp
> ===================================================================
> --- lib/Driver/Action.cpp
> +++ lib/Driver/Action.cpp
> @@ -58,9 +58,10 @@
>  void CudaDeviceAction::anchor() {}
>  
>  CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
> -                                   const char *ArchName, bool AtTopLevel)
> +                                   const char *ArchName,
> +                                   const char *DeviceTriple, bool AtTopLevel)
>      : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
> -      AtTopLevel(AtTopLevel) {}
> +      DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {}
>  
>  void CudaHostAction::anchor() {}
>  
> Index: include/clang/Driver/Action.h
> ===================================================================
> --- include/clang/Driver/Action.h
> +++ include/clang/Driver/Action.h
> @@ -139,15 +139,17 @@
>    virtual void anchor();
>    /// GPU architecture to bind -- e.g 'sm_35'.
>    const char *GpuArchName;
> +  const char *DeviceTriple;
>    /// True when action results are not consumed by the host action (e.g when
>    /// -fsyntax-only or --cuda-device-only options are used).
>    bool AtTopLevel;
>  
>  public:
>    CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
> -                   bool AtTopLevel);
> +                   const char *DeviceTriple, bool AtTopLevel);
>  
>    const char *getGpuArchName() const { return GpuArchName; }
> +  const char *getDeviceTriple() const { return DeviceTriple; }
>    bool isAtTopLevel() const { return AtTopLevel; }
>  
>    static bool classof(const Action *A) {