[Mlir-commits] [mlir] [XeVM] Refactor the SPIR-V generation to use SPIR-V backend API. (PR #189494)

Tue Apr 7 08:56:57 PDT 2026

https://github.com/mshahneo updated https://github.com/llvm/llvm-project/pull/189494

>From 0647f10af14c21627371497027796e89fac915fc Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Mon, 30 Mar 2026 21:37:37 +0000
Subject: [PATCH 1/3] [XeVM] Refactor the SPIR-V generation to use SPIR-V
 backend API.

Currently, we use 2 different approach to generate SPIR-V based
on compilation target. If compilation target is `assembly/isa`,
an MLIR interface `translateToISA` is used to convert an LLVM
module to SPIR-V text. For other cases (`bin/fatbin` compilation
target) SPIR-V backend API is used to generate SPIR-V binary.

SPIR-V backend API is more powerful, as lets one pass the
necessary extensions which is a must if one is using any
advanced or vendor-specific SPIR-V features.

This PR discontinues the usage of MLIR API and consolidates to
use SPIR-V API.

It also ensures that SPIR-V generated from MLIR side is always in
binary format (for both XeVM target and SPIR-V target).
---
 mlir/lib/Target/LLVM/XeVM/Target.cpp | 69 +++++++++-------------------
 1 file changed, 21 insertions(+), 48 deletions(-)

diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 83eec5e9d5549..e95771d9317a0 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -306,54 +306,6 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
       "The `SPIRV` target was not built. Please enable "
       "it when building LLVM.");
 #else
-
-  // Return SPIRV text if the compilation target is `assembly`.
-  // Note: Optimization passes are skipped and SPIRV extensions are
-  // not supported in this mode.
-  if (targetOptions.getCompilationTarget() ==
-      gpu::CompilationTarget::Assembly) {
-    FailureOr<llvm::TargetMachine *> targetMachine = getOrCreateTargetMachine();
-    if (failed(targetMachine))
-      return getGPUModuleOp().emitError()
-             << "Target Machine unavailable for triple " << triple
-             << ", can't optimize with LLVM\n";
-
-    FailureOr<SmallString<0>> serializedISA =
-        translateModuleToISA(llvmModule, **targetMachine,
-                             [&]() { return getGPUModuleOp().emitError(); });
-    if (failed(serializedISA))
-      return getGPUModuleOp().emitError()
-             << "Failed translating the module to ISA." << triple
-             << ", can't compile with LLVM\n";
-
-#define DEBUG_TYPE "serialize-to-isa"
-    LLVM_DEBUG({
-      llvm::dbgs() << "SPIR-V for module: " << getGPUModuleOp().getNameAttr()
-                   << "\n";
-      llvm::dbgs() << *serializedISA << "\n";
-      llvm::dbgs().flush();
-    });
-#undef DEBUG_TYPE
-
-    // Make sure to include the null terminator.
-    StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
-    return SmallVector<char, 0>(bin.begin(), bin.end());
-  }
-
-  // Binary generation path for SPIR-V target. Optimization and SPIR-V
-  // extensions are enabled in this path. In this path, first the SPIR-V binary
-  // is generated directly using the SPIR-V backends `SPIRVTranslateModule` API.
-  // Resultant SPIR-V is then fed to `ocloc` compiler (Intel's OpenCL Offline
-  // Compiler) to generate the final binary for Intel GPUs.
-
-  // @TODO: This part is doing exact same SPIR-V code generation as the previous
-  // section under (targetOptions.getCompilationTarget() ==
-  // gpu::CompilationTarget::Assembly) condition. Only execption is, it enables
-  // optimization and SPIRV extensions support for SPIRV binary output. We need
-  // to decide which one do we use for our SPIRV code generation, and remove the
-  // other one to avoid confusion. For now, we keep both to have more
-  // flexibility for testing and comparison.
-
   std::string serializedSPIRVBinary;
   std::string ErrMsg;
   std::vector<std::string> Opts;
@@ -375,6 +327,27 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
 
   StringRef spirvBin(serializedSPIRVBinary.c_str(),
                      serializedSPIRVBinary.size());
+
+  // Return SPIRV binary if the compilation target is `assembly`. Optimization
+  // and SPIR-V extensions are enabled for SPIR-V binary output in both paths
+  // (assembly and binary) as of now. SPIR-V binary
+  // is generated directly using the SPIR-V backends `SPIRVTranslate` API.
+  if (targetOptions.getCompilationTarget() ==
+      gpu::CompilationTarget::Assembly) {
+#define DEBUG_TYPE "serialize-to-isa"
+    LLVM_DEBUG({
+      llvm::dbgs() << "SPIR-V for module: " << getGPUModuleOp().getNameAttr()
+                   << "\n";
+      llvm::dbgs() << serializedSPIRVBinary << "\n";
+      llvm::dbgs().flush();
+    });
+#undef DEBUG_TYPE
+    return SmallVector<char, 0>(spirvBin.begin(), spirvBin.end());
+  }
+
+  // Return native binary. Compile the SPIR-V binary to native binary for Intel
+  // GPUs using `ocloc` compiler (Intel's OpenCL Offline Compiler).
+
   return compileToBinary(spirvBin, "-spirv_input");
 #endif // LLVM_HAS_SPIRV_TARGET
 }

>From 7bce5ffe8422803d2f1037425617acb3e400cae9 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Mon, 30 Mar 2026 22:09:36 +0000
Subject: [PATCH 2/3] Remove test cases that relied on SPIR-V text generation.

---
 .../MathToXeVM/native-spirv-builtins.mlir     | 119 ------------------
 1 file changed, 119 deletions(-)
 delete mode 100644 mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir

diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
deleted file mode 100644
index 82426c44ddb1f..0000000000000
--- a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
+++ /dev/null
@@ -1,119 +0,0 @@
-// RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \
-// RUN:             -debug-only=serialize-to-isa 2> %t 
-// RUN: FileCheck --input-file=%t %s
-// REQUIRES: asserts
-//
-// MathToXeVM pass generates OpenCL intrinsics function calls when converting
-// Math ops with `fastmath` attr to native function calls. It is assumed that
-// the SPIRV backend would correctly convert these intrinsics calls to OpenCL
-// ExtInst instructions in SPIRV (See llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp).
-//
-// To ensure this assumption holds, this test verifies that the SPIRV backend
-// behaves as expected.
-
-module @test_ocl_intrinsics attributes {gpu.container_module} {
-  gpu.module @kernel [#xevm.target] {
-    llvm.func spir_kernelcc @native_fcns() attributes {gpu.kernel} {
-      // CHECK-DAG: %[[F16T:.+]] = OpTypeFloat 16
-      // CHECK-DAG: %[[ZERO_F16:.+]] = OpConstantNull %[[F16T]]
-      %c0_f16 = llvm.mlir.constant(0. : f16) : f16
-      // CHECK-DAG: %[[F32T:.+]] = OpTypeFloat 32
-      // CHECK-DAG: %[[ZERO_F32:.+]] = OpConstantNull %[[F32T]]
-      %c0_f32 = llvm.mlir.constant(0. : f32) : f32
-      // CHECK-DAG: %[[F64T:.+]] = OpTypeFloat 64
-      // CHECK-DAG: %[[ZERO_F64:.+]] = OpConstantNull %[[F64T]]
-      %c0_f64 = llvm.mlir.constant(0. : f64) : f64
-
-      // CHECK-DAG: %[[V2F64T:.+]] = OpTypeVector %[[F64T]] 2
-      // CHECK-DAG: %[[V2_ZERO_F64:.+]] = OpConstantNull %[[V2F64T]]
-      %v2_c0_f64 = llvm.mlir.constant(dense<0.> : vector<2xf64>) : vector<2xf64>
-      // CHECK-DAG: %[[V3F32T:.+]] = OpTypeVector %[[F32T]] 3
-      // CHECK-DAG: %[[V3_ZERO_F32:.+]] = OpConstantNull %[[V3F32T]]
-      %v3_c0_f32 = llvm.mlir.constant(dense<0.> : vector<3xf32>) : vector<3xf32>
-      // CHECK-DAG: %[[V4F64T:.+]] = OpTypeVector %[[F64T]] 4
-      // CHECK-DAG: %[[V4_ZERO_F64:.+]] = OpConstantNull %[[V4F64T]]
-      %v4_c0_f64 = llvm.mlir.constant(dense<0.> : vector<4xf64>) : vector<4xf64>
-      // CHECK-DAG: %[[V8F64T:.+]] = OpTypeVector %[[F64T]] 8
-      // CHECK-DAG: %[[V8_ZERO_F64:.+]] = OpConstantNull %[[V8F64T]]
-      %v8_c0_f64 = llvm.mlir.constant(dense<0.> : vector<8xf64>) : vector<8xf64>
-      // CHECK-DAG: %[[V16F16T:.+]] = OpTypeVector %[[F16T]] 16
-      // CHECK-DAG: %[[V16_ZERO_F16:.+]] = OpConstantNull %[[V16F16T]]
-      %v16_c0_f16 = llvm.mlir.constant(dense<0.> : vector<16xf16>) : vector<16xf16>     
-
-      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_exp %[[ZERO_F16]]
-      %exp_f16 = llvm.call @_Z22__spirv_ocl_native_expDh(%c0_f16) : (f16) -> f16
-      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp %[[ZERO_F32]]
-      %exp_f32 = llvm.call @_Z22__spirv_ocl_native_expf(%c0_f32) : (f32) -> f32
-      // CHECK: OpExtInst %[[F64T]] %{{.+}} native_exp %[[ZERO_F64]]
-      %exp_f64 = llvm.call @_Z22__spirv_ocl_native_expd(%c0_f64) : (f64) -> f64
-
-      // CHECK: OpExtInst %[[V2F64T]] %{{.+}} native_exp %[[V2_ZERO_F64]]
-      %exp_v2_f64 = llvm.call @_Z22__spirv_ocl_native_expDv2_f64(%v2_c0_f64) : (vector<2xf64>) -> vector<2xf64>
-      // CHECK: OpExtInst %[[V3F32T]] %{{.+}} native_exp %[[V3_ZERO_F32]]
-      %exp_v3_f32 = llvm.call @_Z22__spirv_ocl_native_expDv3_f32(%v3_c0_f32) : (vector<3xf32>) -> vector<3xf32>
-      // CHECK: OpExtInst %[[V4F64T]] %{{.+}} native_exp %[[V4_ZERO_F64]]
-      %exp_v4_f64 = llvm.call @_Z22__spirv_ocl_native_expDv4_f64(%v4_c0_f64) : (vector<4xf64>) -> vector<4xf64>
-      // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_exp %[[V8_ZERO_F64]]
-      %exp_v8_f64 = llvm.call @_Z22__spirv_ocl_native_expDv8_f64(%v8_c0_f64) : (vector<8xf64>) -> vector<8xf64>
-      // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_exp %[[V16_ZERO_F16]]
-      %exp_v16_f16 = llvm.call @_Z22__spirv_ocl_native_expDv16_f16(%v16_c0_f16) : (vector<16xf16>) -> vector<16xf16>
-
-      // SPIRV backend does not currently handle fastmath flags: The SPIRV
-      // backend would need to generate OpDecorate calls to decorate math ops
-      // with FPFastMathMode/FPFastMathModeINTEL decorations.
-      //
-      // FIXME: When support for fastmath flags in the SPIRV backend is added, 
-      // add tests here to ensure fastmath flags are converted to the correct
-      // OpDecorate calls.
-      // 
-      // See:
-      // - https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_math_extended_instructions
-      // - https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpDecorate
-
-      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_cos %[[ZERO_F16]]
-      %cos_afn_f16 = llvm.call @_Z22__spirv_ocl_native_cosDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
-      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp2 %[[ZERO_F32]]
-      %exp2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_exp2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
-      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_log %[[ZERO_F16]]
-      %log_afn_f16 = llvm.call @_Z22__spirv_ocl_native_logDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
-      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_log2 %[[ZERO_F32]]
-      %log2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_log2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
-      // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_log10 %[[V8_ZERO_F64]]
-      %log10_afn_f64 = llvm.call @_Z24__spirv_ocl_native_log10Dv8_d(%v8_c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
-      // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_powr %[[V16_ZERO_F16]] %[[V16_ZERO_F16]]
-      %powr_afn_f16 = llvm.call @_Z23__spirv_ocl_native_powrDv16_DhS_(%v16_c0_f16, %v16_c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf16>, vector<16xf16>) -> vector<16xf16>
-      // CHECK: OpExtInst %[[F64T]] %{{.+}} native_rsqrt %[[ZERO_F64]]
-      %rsqrt_afn_f64 = llvm.call @_Z24__spirv_ocl_native_rsqrtd(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
-      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_sin %[[ZERO_F16]]
-      %sin_afn_f16 = llvm.call @_Z22__spirv_ocl_native_sinDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
-      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_sqrt %[[ZERO_F32]]
-      %sqrt_afn_f32 = llvm.call @_Z23__spirv_ocl_native_sqrtf(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
-      // CHECK: OpExtInst %[[F64T]] %{{.+}} native_tan %[[ZERO_F64]]
-      %tan_afn_f64 = llvm.call @_Z22__spirv_ocl_native_tand(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
-      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_divide %[[ZERO_F32]] %[[ZERO_F32]]
-      %divide_afn_f32 = llvm.call @_Z25__spirv_ocl_native_divideff(%c0_f32, %c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
-
-      llvm.return
-    }
-
-    llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
-    llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
-    llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
-    llvm.func @_Z22__spirv_ocl_native_expDv2_f64(vector<2xf64>) -> vector<2xf64>
-    llvm.func @_Z22__spirv_ocl_native_expDv3_f32(vector<3xf32>) -> vector<3xf32>
-    llvm.func @_Z22__spirv_ocl_native_expDv4_f64(vector<4xf64>) -> vector<4xf64>
-    llvm.func @_Z22__spirv_ocl_native_expDv8_f64(vector<8xf64>) -> vector<8xf64>
-    llvm.func @_Z22__spirv_ocl_native_expDv16_f16(vector<16xf16>) -> vector<16xf16>
-    llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
-    llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
-    llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
-    llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
-    llvm.func @_Z24__spirv_ocl_native_log10Dv8_d(vector<8xf64>) -> vector<8xf64>
-    llvm.func @_Z23__spirv_ocl_native_powrDv16_DhS_(vector<16xf16>, vector<16xf16>) -> vector<16xf16>
-    llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
-    llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
-    llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
-    llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
-    llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
-  }
-}

>From 35c0fc8ddac569809cd84828b69cc49c3e73a07c Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Mon, 6 Apr 2026 20:19:24 +0000
Subject: [PATCH 3/3] Address review comment.

---
 mlir/lib/Target/LLVM/XeVM/Target.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index e95771d9317a0..f8f09258fd861 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -312,6 +312,7 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
   Opts.push_back(triple.str());
   Opts.push_back(std::to_string(optLevel));
 
+  // Translate the LLVM module to SPIR-V binary using LLVM's SPIR-V Backend API.
   bool success =
       SPIRVTranslateModule(&llvmModule, serializedSPIRVBinary, ErrMsg,
                            getDefaultSPIRVExtensions(), Opts);
@@ -331,7 +332,7 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
   // Return SPIRV binary if the compilation target is `assembly`. Optimization
   // and SPIR-V extensions are enabled for SPIR-V binary output in both paths
   // (assembly and binary) as of now. SPIR-V binary
-  // is generated directly using the SPIR-V backends `SPIRVTranslate` API.
+  // is generated directly using the SPIR-V backends `SPIRVTranslateModule` API.
   if (targetOptions.getCompilationTarget() ==
       gpu::CompilationTarget::Assembly) {
 #define DEBUG_TYPE "serialize-to-isa"