[clang] ac135f9 - [Clang] Fix using LTO with the new driver in RDC-mode

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 6 07:36:22 PDT 2022


Author: Joseph Huber
Date: 2022-10-06T09:36:09-05:00
New Revision: ac135f9ee574e7451088926c667d93d51a3d6940

URL: https://github.com/llvm/llvm-project/commit/ac135f9ee574e7451088926c667d93d51a3d6940
DIFF: https://github.com/llvm/llvm-project/commit/ac135f9ee574e7451088926c667d93d51a3d6940.diff

LOG: [Clang] Fix using LTO with the new driver in RDC-mode

The new driver supports LTO for RDC-mode compilations. However, this was
not correctly handled for non-LTO compilations. HIP can handle this as
it is fed to `lld` which will perform the LTO itself. CUDA however would
require every work which is wholly useless in non-RDC mode so it should
report an error.

Reviewed By: yaxunl

Differential Revision: https://reviews.llvm.org/D135305

Added: 
    

Modified: 
    clang/lib/Driver/Driver.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/test/Driver/cuda-bindings.cu
    clang/test/Driver/cuda-phases.cu
    clang/test/Driver/hip-binding.hip
    clang/test/Driver/hip-phases.hip

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index be62ce5de4b3..21dc180d26c3 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4375,7 +4375,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
 
     // Compiling HIP in non-RDC mode requires linking each action individually.
     for (Action *&A : DeviceActions) {
-      if (A->getType() != types::TY_Object || Kind != Action::OFK_HIP ||
+      if ((A->getType() != types::TY_Object &&
+           A->getType() != types::TY_LTO_BC) ||
+          Kind != Action::OFK_HIP ||
           Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
         continue;
       ActionList LinkerInput = {A};

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 4f880c27b2f1..1a3ee0964835 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4842,7 +4842,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-emit-llvm-uselists");
 
     if (IsUsingLTO) {
-      // Only AMDGPU supports device-side LTO.
       if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
           !Args.hasFlag(options::OPT_offload_new_driver,
                         options::OPT_no_offload_new_driver, false) &&
@@ -4852,6 +4851,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                                options::OPT_foffload_lto_EQ)
                    ->getAsString(Args)
             << Triple.getTriple();
+      } else if (Triple.isNVPTX() && !IsRDCMode) {
+        D.Diag(diag::err_drv_unsupported_opt_for_language_mode)
+            << Args.getLastArg(options::OPT_foffload_lto,
+                               options::OPT_foffload_lto_EQ)
+                   ->getAsString(Args)
+            << "-fno-gpu-rdc";
       } else {
         assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin);
         CmdArgs.push_back(Args.MakeArgString(

diff  --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu
index 78c9bb975c6f..7f2d60421cc3 100644
--- a/clang/test/Driver/cuda-bindings.cu
+++ b/clang/test/Driver/cuda-bindings.cu
@@ -234,3 +234,11 @@
 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_52"
 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
 // SAVE-TEMPS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"
+
+//
+// Check to ensure that we cannot use '-foffload' when not operating in RDC-mode.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -fno-gpu-rdc --offload-new-driver \
+// RUN:        -foffload-lto --offload-arch=sm_70 --offload-arch=sm_52 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
+// LTO-NO-RDC: error: unsupported option '-foffload-lto' for language mode '-fno-gpu-rdc'

diff  --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu
index 0230d30f8388..2622c3a1bf55 100644
--- a/clang/test/Driver/cuda-phases.cu
+++ b/clang/test/Driver/cuda-phases.cu
@@ -294,3 +294,27 @@
 // NON-CUDA-INPUT-NEXT: 22: backend, {21}, assembler, (host-cuda)
 // NON-CUDA-INPUT-NEXT: 23: assembler, {22}, object, (host-cuda)
 // NON-CUDA-INPUT-NEXT: 24: clang-linker-wrapper, {18, 23}, image, (host-cuda)
+
+//
+// Test the phases using the new driver in LTO-mode.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-phases \
+// RUN:        --offload-arch=sm_70 --offload-arch=sm_52 -foffload-lto -fgpu-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=LTO %s
+//      LTO: 0: input, "[[INPUT:.+]]", cuda, (host-cuda)
+// LTO-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
+// LTO-NEXT: 2: compiler, {1}, ir, (host-cuda)
+// LTO-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// LTO-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// LTO-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// LTO-NEXT: 6: backend, {5}, lto-bc, (device-cuda, sm_52)
+// LTO-NEXT: 7: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, lto-bc
+// LTO-NEXT: 8: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// LTO-NEXT: 9: preprocessor, {8}, cuda-cpp-output, (device-cuda, sm_70)
+// LTO-NEXT: 10: compiler, {9}, ir, (device-cuda, sm_70)
+// LTO-NEXT: 11: backend, {10}, lto-bc, (device-cuda, sm_70)
+// LTO-NEXT: 12: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {11}, lto-bc
+// LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-cuda)
+// LTO-NEXT: 14: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {13}, ir
+// LTO-NEXT: 15: backend, {14}, assembler, (host-cuda)
+// LTO-NEXT: 16: assembler, {15}, object, (host-cuda)

diff  --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip
index 6875af6a85c1..c4ec5080afea 100644
--- a/clang/test/Driver/hip-binding.hip
+++ b/clang/test/Driver/hip-binding.hip
@@ -79,3 +79,15 @@
 // SYNTAX-ONLY: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-fsyntax-only"
 // SYNTAX-ONLY: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-fsyntax-only"
 // SYNTAX-ONLY: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-fsyntax-only"
+
+//
+// Check to ensure that we can use '-foffload' when not operating in RDC-mode.
+//
+// RUN: %clang -### --target=x86_64-linux-gnu -fno-gpu-rdc --offload-new-driver -ccc-print-bindings \
+// RUN:        -foffload-lto --offload-arch=gfx90a --offload-arch=gfx908 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
+//      LTO-NO-RDC: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[LTO_908:.+]]"
+// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_908]]"], output: "[[OBJ_908:.+]]"
+// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[LTO_90A:.+]]"
+// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_90A]]"], output: "[[OBJ_90A:.+]]"
+// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ_908]]", "[[OBJ_90A]]"], output: "[[HIPFB:.+]]"

diff  --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 157302588a70..a9be06371a6c 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -546,3 +546,27 @@
 
 // CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
 // CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir
+
+//
+// Test the bindings using the new driver in LTO-mode.
+//
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
+// RUN:        --offload-arch=gfx90a --offload-arch=gfx908 -foffload-lto -fgpu-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=LTO %s
+//      LTO: 0: input, "[[INPUT:.+]]", hip, (host-hip)
+// LTO-NEXT: 1: preprocessor, {0}, hip-cpp-output, (host-hip)
+// LTO-NEXT: 2: compiler, {1}, ir, (host-hip)
+// LTO-NEXT: 3: input, "[[INPUT]]", hip, (device-hip, gfx908)
+// LTO-NEXT: 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx908)
+// LTO-NEXT: 5: compiler, {4}, ir, (device-hip, gfx908)
+// LTO-NEXT: 6: backend, {5}, lto-bc, (device-hip, gfx908)
+// LTO-NEXT: 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {6}, lto-bc
+// LTO-NEXT: 8: input, "[[INPUT]]", hip, (device-hip, gfx90a)
+// LTO-NEXT: 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx90a)
+// LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
+// LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
+// LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
+// LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-hip)
+// LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {13}, ir
+// LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
+// LTO-NEXT: 16: assembler, {15}, object, (host-hip)


        


More information about the cfe-commits mailing list