[llvm] [mlir] [mlir] Decouple NVPTX target from CUDA toolkit presence (PR #93008)

Wed May 22 13:53:43 PDT 2024

https://github.com/tyb0807 updated https://github.com/llvm/llvm-project/pull/93008

>From 0d31af7ba296fc89b4c33ca35bad3dfc57692783 Mon Sep 17 00:00:00 2001
From: Son Vu <son at brium.ai>
Date: Wed, 22 May 2024 11:22:06 +0200
Subject: [PATCH] [mlir] Replace MLIR_ENABLE_CUDA_CONVERSIONS with
 LLVM_HAS_NVPTX_TARGET

---
 mlir/CMakeLists.txt                           |  8 -----
 mlir/include/mlir/InitAllPasses.h             |  3 +-
 .../GPU/Pipelines/GPUToNVVMPipeline.cpp       |  5 ++-
 .../Dialect/GPU/Transforms/ModuleToBinary.cpp |  3 +-
 mlir/lib/Target/LLVM/CMakeLists.txt           |  2 +-
 mlir/lib/Target/LLVM/NVVM/Target.cpp          | 34 ++++++++++---------
 mlir/test/CMakeLists.txt                      |  1 -
 .../Dialect/GPU/module-to-binary-nvvm.mlir    |  1 -
 mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir |  3 +-
 mlir/test/lit.cfg.py                          |  3 --
 mlir/test/lit.site.cfg.py.in                  |  1 -
 .../dialects/gpu/module-to-binary-nvvm.py     |  1 -
 .../Target/LLVM/SerializeNVVMTarget.cpp       |  2 +-
 .../llvm-project-overlay/mlir/BUILD.bazel     |  7 ----
 .../mlir/test/BUILD.bazel                     |  1 -
 15 files changed, 25 insertions(+), 50 deletions(-)

diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 4c0ef8387b8df..9f0b0d68b816e 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -110,14 +110,6 @@ else()
   set(MLIR_ENABLE_EXECUTION_ENGINE 0)
 endif()
 
-# Build the CUDA conversions and run according tests if the NVPTX backend
-# is available
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
-  set(MLIR_ENABLE_CUDA_CONVERSIONS 1)
-else()
-  set(MLIR_ENABLE_CUDA_CONVERSIONS 0)
-endif()
-
 # Build the ROCm conversions and run according tests if the AMDGPU backend
 # is available.
 if ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index 90406f555b0f4..fedd7737f9ea4 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -14,7 +14,6 @@
 #ifndef MLIR_INITALLPASSES_H_
 #define MLIR_INITALLPASSES_H_
 
-#include "mlir/Config/mlir-config.h"
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/AMDGPU/Transforms/Passes.h"
 #include "mlir/Dialect/Affine/Passes.h"
@@ -99,7 +98,7 @@ inline void registerAllPasses() {
   bufferization::registerBufferizationPipelines();
   sparse_tensor::registerSparseTensorPipelines();
   tosa::registerTosaToLinalgPipelines();
-#if MLIR_ENABLE_CUDA_CONVERSIONS
+#if LLVM_HAS_NVPTX_TARGET
   gpu::registerGPUToNVVMPipeline();
 #endif
 }
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
index db1974ddb3773..f4573030a4579 100644
--- a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
+++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Config/mlir-config.h"
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
 #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
@@ -39,7 +38,7 @@
 
 using namespace mlir;
 
-#if MLIR_ENABLE_CUDA_CONVERSIONS
+#if LLVM_HAS_NVPTX_TARGET
 namespace {
 
 //===----------------------------------------------------------------------===//
@@ -128,4 +127,4 @@ void mlir::gpu::registerGPUToNVVMPipeline() {
       buildLowerToNVVMPassPipeline);
 }
 
-#endif // MLIR_ENABLE_CUDA_CONVERSIONS
+#endif // LLVM_HAS_NVPTX_TARGET
diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
index 836e939a8295b..1e7596e8cc4af 100644
--- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
@@ -13,7 +13,6 @@
 
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
 
-#include "mlir/Config/mlir-config.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -49,7 +48,7 @@ void GpuModuleToBinaryPass::getDependentDialects(
   // Register all GPU related translations.
   registry.insert<gpu::GPUDialect>();
   registry.insert<LLVM::LLVMDialect>();
-#if MLIR_ENABLE_CUDA_CONVERSIONS
+#if LLVM_HAS_NVPTX_TARGET
   registry.insert<NVVM::NVVMDialect>();
 #endif
 #if MLIR_ENABLE_ROCM_CONVERSIONS
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index e0657c895e8a3..5a3fa160850b4 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -47,7 +47,7 @@ add_mlir_dialect_library(MLIRNVVMTarget
   MLIRNVVMToLLVMIRTranslation
   )
 
-if(MLIR_ENABLE_CUDA_CONVERSIONS)
+if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   # Find the CUDA toolkit.
   find_package(CUDAToolkit)
 
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index e438ce84af1b5..e75547ff9b850 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -13,7 +13,6 @@
 
 #include "mlir/Target/LLVM/NVVM/Target.h"
 
-#include "mlir/Config/mlir-config.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Target/LLVM/NVVM/Utils.h"
@@ -158,40 +157,43 @@ SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
   return std::move(bcFiles);
 }
 
-#if MLIR_ENABLE_CUDA_CONVERSIONS
+#if LLVM_HAS_NVPTX_TARGET
 namespace {
 class NVPTXSerializer : public SerializeGPUModuleBase {
 public:
   NVPTXSerializer(Operation &module, NVVMTargetAttr target,
                   const gpu::TargetOptions &targetOptions);
 
+  /// Returns the GPU module op being serialized.
   gpu::GPUModuleOp getOperation();
 
-  // Compile PTX to cubin using `ptxas`.
+  /// Compiles PTX to cubin using `ptxas`.
   std::optional<SmallVector<char, 0>>
   compileToBinary(const std::string &ptxCode);
 
-  // Compile PTX to cubin using the `nvptxcompiler` library.
+  /// Compiles PTX to cubin using the `nvptxcompiler` library.
   std::optional<SmallVector<char, 0>>
   compileToBinaryNVPTX(const std::string &ptxCode);
 
+  /// Serializes the LLVM module to an object format, depending on the
+  /// compilation target selected in target options.
   std::optional<SmallVector<char, 0>>
   moduleToObject(llvm::Module &llvmModule) override;
 
 private:
   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
 
-  // Create a temp file.
+  /// Creates a temp file.
   std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
 
-  // Find the `tool` path, where `tool` is the name of the binary to search,
-  // i.e. `ptxas` or `fatbinary`. The search order is:
-  // 1. The toolkit path in `targetOptions`.
-  // 2. In the system PATH.
-  // 3. The path from `getCUDAToolkitPath()`.
+  /// Finds the `tool` path, where `tool` is the name of the binary to search,
+  /// i.e. `ptxas` or `fatbinary`. The search order is:
+  /// 1. The toolkit path in `targetOptions`.
+  /// 2. In the system PATH.
+  /// 3. The path from `getCUDAToolkitPath()`.
   std::optional<std::string> findTool(StringRef tool);
 
-  // Target options.
+  /// Target options.
   gpu::TargetOptions targetOptions;
 };
 } // namespace
@@ -515,7 +517,7 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
 
 std::optional<SmallVector<char, 0>>
 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
-  // Return LLVM IR if the compilation target is offload.
+  // Return LLVM IR if the compilation target is `offload`.
 #define DEBUG_TYPE "serialize-to-llvm"
   LLVM_DEBUG({
     llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
@@ -549,7 +551,7 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
   });
 #undef DEBUG_TYPE
 
-  // Return PTX if the compilation target is assembly.
+  // Return PTX if the compilation target is `assembly`.
   if (targetOptions.getCompilationTarget() ==
       gpu::CompilationTarget::Assembly) {
     // Make sure to include the null terminator.
@@ -564,7 +566,7 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
   return compileToBinary(*serializedISA);
 #endif // MLIR_ENABLE_NVPTXCOMPILER
 }
-#endif // MLIR_ENABLE_CUDA_CONVERSIONS
+#endif // LLVM_HAS_NVPTX_TARGET
 
 std::optional<SmallVector<char, 0>>
 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
@@ -576,7 +578,7 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
     module->emitError("Module must be a GPU module.");
     return std::nullopt;
   }
-#if MLIR_ENABLE_CUDA_CONVERSIONS
+#if LLVM_HAS_NVPTX_TARGET
   NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
   serializer.init();
   return serializer.run();
@@ -584,7 +586,7 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
   module->emitError(
       "The `NVPTX` target was not built. Please enable it when building LLVM.");
   return std::nullopt;
-#endif // MLIR_ENABLE_CUDA_CONVERSIONS
+#endif // LLVM_HAS_NVPTX_TARGET
 }
 
 Attribute
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index 8806a1dd92238..0bd2970c6d942 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -68,7 +68,6 @@ endif()
 llvm_canonicalize_cmake_booleans(
   LLVM_BUILD_EXAMPLES
   MLIR_ENABLE_BINDINGS_PYTHON
-  MLIR_ENABLE_CUDA_CONVERSIONS
   MLIR_ENABLE_CUDA_RUNNER
   MLIR_ENABLE_ROCM_CONVERSIONS
   MLIR_ENABLE_ROCM_RUNNER
diff --git a/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir b/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
index c286c8bc9042f..a2d3949586a2c 100644
--- a/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
+++ b/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
@@ -1,4 +1,3 @@
-// REQUIRES: host-supports-nvptx
 // RUN: mlir-opt %s --gpu-module-to-binary="format=llvm" | FileCheck %s
 // RUN: mlir-opt %s --gpu-module-to-binary="format=isa" | FileCheck %s -check-prefix=CHECK-ISA
 
diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
index 07e719798b851..27d785b22599f 100644
--- a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
+++ b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
@@ -1,4 +1,3 @@
-// REQUIRES: host-supports-nvptx
 // RUN: mlir-opt %s \
 // RUN:  | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
 // RUN:   | FileCheck %s
@@ -27,4 +26,4 @@ func.func @test_math(%arg0 : f32) {
         gpu.terminator
     }
     return
-}
\ No newline at end of file
+}
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index ea6d9ae71b773..d65b9e731bdbe 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -245,8 +245,5 @@ def have_host_jit_feature_support(feature_name):
 if have_host_jit_feature_support("jit"):
     config.available_features.add("host-supports-jit")
 
-if config.run_cuda_tests:
-    config.available_features.add("host-supports-nvptx")
-
 if config.run_rocm_tests:
     config.available_features.add("host-supports-amdgpu")
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index c0fa1b8980e53..fc1f49853cc8e 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -25,7 +25,6 @@ config.mlir_cmake_dir = "@MLIR_CMAKE_DIR@"
 config.mlir_lib_dir = "@MLIR_LIB_DIR@"
 
 config.build_examples = @LLVM_BUILD_EXAMPLES@
-config.run_cuda_tests = @MLIR_ENABLE_CUDA_CONVERSIONS@
 config.enable_cuda_runner = @MLIR_ENABLE_CUDA_RUNNER@
 config.run_rocm_tests = @MLIR_ENABLE_ROCM_CONVERSIONS@
 config.enable_rocm_runner = @MLIR_ENABLE_ROCM_RUNNER@
diff --git a/mlir/test/python/dialects/gpu/module-to-binary-nvvm.py b/mlir/test/python/dialects/gpu/module-to-binary-nvvm.py
index e0225911b2144..9257d2e235c9e 100644
--- a/mlir/test/python/dialects/gpu/module-to-binary-nvvm.py
+++ b/mlir/test/python/dialects/gpu/module-to-binary-nvvm.py
@@ -1,4 +1,3 @@
-# REQUIRES: host-supports-nvptx
 # RUN: %PYTHON %s | FileCheck %s
 
 from mlir.ir import *
diff --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
index cea49356538f0..a8fe20d52fb2a 100644
--- a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@@ -30,7 +30,7 @@
 using namespace mlir;
 
 // Skip the test if the NVPTX target was not built.
-#if MLIR_ENABLE_CUDA_CONVERSIONS
+#if LLVM_HAS_NVPTX_TARGET
 #define SKIP_WITHOUT_NVPTX(x) x
 #else
 #define SKIP_WITHOUT_NVPTX(x) DISABLED_##x
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 614fe511d43a5..c9e31985781bf 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -51,9 +51,6 @@ expand_template(
         "#cmakedefine01 MLIR_ENABLE_NVPTXCOMPILER": "#define MLIR_ENABLE_NVPTXCOMPILER 0",
         "#cmakedefine01 MLIR_ENABLE_PDL_IN_PATTERNMATCH": "#define MLIR_ENABLE_PDL_IN_PATTERNMATCH 1",
         "#cmakedefine01 MLIR_ENABLE_ROCM_CONVERSIONS": "#define MLIR_ENABLE_ROCM_CONVERSIONS 0",
-    } | if_cuda_available(
-        {"#cmakedefine01 MLIR_ENABLE_CUDA_CONVERSIONS": "#define MLIR_ENABLE_CUDA_CONVERSIONS 1"},
-        {"#cmakedefine01 MLIR_ENABLE_CUDA_CONVERSIONS": "#define MLIR_ENABLE_CUDA_CONVERSIONS 0"},
     ),
     template = "include/mlir/Config/mlir-config.h.cmake",
 )
@@ -5616,7 +5613,6 @@ cc_library(
         ":Transforms",
         ":VectorToLLVM",
         ":VectorToSCF",
-        ":config",
     ],
 )
 
@@ -5666,7 +5662,6 @@ cc_library(
         ":Transforms",
         ":VCIXToLLVMIRTranslation",
         ":VectorDialect",
-        ":config",
         "//llvm:Core",
         "//llvm:MC",
         "//llvm:Support",
@@ -6282,7 +6277,6 @@ cc_library(
         ":NVVMToLLVMIRTranslation",
         ":TargetLLVM",
         ":ToLLVMIRTranslation",
-        ":config",
         "//llvm:NVPTXCodeGen",
         "//llvm:Support",
         "//llvm:config",
@@ -9363,7 +9357,6 @@ cc_library(
         ":X86VectorTransforms",
         ":XeGPUDialect",
         ":XeGPUTransforms",
-        ":config",
     ],
 )
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 258cc88ebbf39..e8d251d51dc54 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -36,7 +36,6 @@ expand_template(
         "\"@MLIR_BINARY_DIR@\"": "os.environ[\"TEST_UNDECLARED_OUTPUTS_DIR\"]",
         # All disabled, but required to substituted because they are not in quotes.
         "@LLVM_BUILD_EXAMPLES@": "0",
-        "@MLIR_ENABLE_CUDA_CONVERSIONS@": "0",
         "@MLIR_ENABLE_CUDA_RUNNER@": "0",
         "@MLIR_ENABLE_ROCM_CONVERSIONS@": "0",
         "@MLIR_ENABLE_ROCM_RUNNER@": "0",