[Mlir-commits] [mlir] [XeVM] Refactor the SPIR-V generation to use SPIR-V backend API. (PR #189494)
Md Abdullah Shahneous Bari
llvmlistbot at llvm.org
Tue Apr 7 08:56:57 PDT 2026
https://github.com/mshahneo updated https://github.com/llvm/llvm-project/pull/189494
>From 0647f10af14c21627371497027796e89fac915fc Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Mon, 30 Mar 2026 21:37:37 +0000
Subject: [PATCH 1/3] [XeVM] Refactor the SPIR-V generation to use SPIR-V
backend API.
Currently, we use 2 different approach to generate SPIR-V based
on compilation target. If compilation target is `assembly/isa`,
an MLIR interface `translateToISA` is used to convert an LLVM
module to SPIR-V text. For other cases (`bin/fatbin` compilation
target) SPIR-V backend API is used to generate SPIR-V binary.
SPIR-V backend API is more powerful, as lets one pass the
necessary extensions which is a must if one is using any
advanced or vendor-specific SPIR-V features.
This PR discontinues the usage of MLIR API and consolidates to
use SPIR-V API.
It also ensures that SPIR-V generated from MLIR side is always in
binary format (for both XeVM target and SPIR-V target).
---
mlir/lib/Target/LLVM/XeVM/Target.cpp | 69 +++++++++-------------------
1 file changed, 21 insertions(+), 48 deletions(-)
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 83eec5e9d5549..e95771d9317a0 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -306,54 +306,6 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
"The `SPIRV` target was not built. Please enable "
"it when building LLVM.");
#else
-
- // Return SPIRV text if the compilation target is `assembly`.
- // Note: Optimization passes are skipped and SPIRV extensions are
- // not supported in this mode.
- if (targetOptions.getCompilationTarget() ==
- gpu::CompilationTarget::Assembly) {
- FailureOr<llvm::TargetMachine *> targetMachine = getOrCreateTargetMachine();
- if (failed(targetMachine))
- return getGPUModuleOp().emitError()
- << "Target Machine unavailable for triple " << triple
- << ", can't optimize with LLVM\n";
-
- FailureOr<SmallString<0>> serializedISA =
- translateModuleToISA(llvmModule, **targetMachine,
- [&]() { return getGPUModuleOp().emitError(); });
- if (failed(serializedISA))
- return getGPUModuleOp().emitError()
- << "Failed translating the module to ISA." << triple
- << ", can't compile with LLVM\n";
-
-#define DEBUG_TYPE "serialize-to-isa"
- LLVM_DEBUG({
- llvm::dbgs() << "SPIR-V for module: " << getGPUModuleOp().getNameAttr()
- << "\n";
- llvm::dbgs() << *serializedISA << "\n";
- llvm::dbgs().flush();
- });
-#undef DEBUG_TYPE
-
- // Make sure to include the null terminator.
- StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
- return SmallVector<char, 0>(bin.begin(), bin.end());
- }
-
- // Binary generation path for SPIR-V target. Optimization and SPIR-V
- // extensions are enabled in this path. In this path, first the SPIR-V binary
- // is generated directly using the SPIR-V backends `SPIRVTranslateModule` API.
- // Resultant SPIR-V is then fed to `ocloc` compiler (Intel's OpenCL Offline
- // Compiler) to generate the final binary for Intel GPUs.
-
- // @TODO: This part is doing exact same SPIR-V code generation as the previous
- // section under (targetOptions.getCompilationTarget() ==
- // gpu::CompilationTarget::Assembly) condition. Only execption is, it enables
- // optimization and SPIRV extensions support for SPIRV binary output. We need
- // to decide which one do we use for our SPIRV code generation, and remove the
- // other one to avoid confusion. For now, we keep both to have more
- // flexibility for testing and comparison.
-
std::string serializedSPIRVBinary;
std::string ErrMsg;
std::vector<std::string> Opts;
@@ -375,6 +327,27 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
StringRef spirvBin(serializedSPIRVBinary.c_str(),
serializedSPIRVBinary.size());
+
+ // Return SPIRV binary if the compilation target is `assembly`. Optimization
+ // and SPIR-V extensions are enabled for SPIR-V binary output in both paths
+ // (assembly and binary) as of now. SPIR-V binary
+ // is generated directly using the SPIR-V backends `SPIRVTranslate` API.
+ if (targetOptions.getCompilationTarget() ==
+ gpu::CompilationTarget::Assembly) {
+#define DEBUG_TYPE "serialize-to-isa"
+ LLVM_DEBUG({
+ llvm::dbgs() << "SPIR-V for module: " << getGPUModuleOp().getNameAttr()
+ << "\n";
+ llvm::dbgs() << serializedSPIRVBinary << "\n";
+ llvm::dbgs().flush();
+ });
+#undef DEBUG_TYPE
+ return SmallVector<char, 0>(spirvBin.begin(), spirvBin.end());
+ }
+
+ // Return native binary. Compile the SPIR-V binary to native binary for Intel
+ // GPUs using `ocloc` compiler (Intel's OpenCL Offline Compiler).
+
return compileToBinary(spirvBin, "-spirv_input");
#endif // LLVM_HAS_SPIRV_TARGET
}
>From 7bce5ffe8422803d2f1037425617acb3e400cae9 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Mon, 30 Mar 2026 22:09:36 +0000
Subject: [PATCH 2/3] Remove test cases that relied on SPIR-V text generation.
---
.../MathToXeVM/native-spirv-builtins.mlir | 119 ------------------
1 file changed, 119 deletions(-)
delete mode 100644 mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
deleted file mode 100644
index 82426c44ddb1f..0000000000000
--- a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
+++ /dev/null
@@ -1,119 +0,0 @@
-// RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \
-// RUN: -debug-only=serialize-to-isa 2> %t
-// RUN: FileCheck --input-file=%t %s
-// REQUIRES: asserts
-//
-// MathToXeVM pass generates OpenCL intrinsics function calls when converting
-// Math ops with `fastmath` attr to native function calls. It is assumed that
-// the SPIRV backend would correctly convert these intrinsics calls to OpenCL
-// ExtInst instructions in SPIRV (See llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp).
-//
-// To ensure this assumption holds, this test verifies that the SPIRV backend
-// behaves as expected.
-
-module @test_ocl_intrinsics attributes {gpu.container_module} {
- gpu.module @kernel [#xevm.target] {
- llvm.func spir_kernelcc @native_fcns() attributes {gpu.kernel} {
- // CHECK-DAG: %[[F16T:.+]] = OpTypeFloat 16
- // CHECK-DAG: %[[ZERO_F16:.+]] = OpConstantNull %[[F16T]]
- %c0_f16 = llvm.mlir.constant(0. : f16) : f16
- // CHECK-DAG: %[[F32T:.+]] = OpTypeFloat 32
- // CHECK-DAG: %[[ZERO_F32:.+]] = OpConstantNull %[[F32T]]
- %c0_f32 = llvm.mlir.constant(0. : f32) : f32
- // CHECK-DAG: %[[F64T:.+]] = OpTypeFloat 64
- // CHECK-DAG: %[[ZERO_F64:.+]] = OpConstantNull %[[F64T]]
- %c0_f64 = llvm.mlir.constant(0. : f64) : f64
-
- // CHECK-DAG: %[[V2F64T:.+]] = OpTypeVector %[[F64T]] 2
- // CHECK-DAG: %[[V2_ZERO_F64:.+]] = OpConstantNull %[[V2F64T]]
- %v2_c0_f64 = llvm.mlir.constant(dense<0.> : vector<2xf64>) : vector<2xf64>
- // CHECK-DAG: %[[V3F32T:.+]] = OpTypeVector %[[F32T]] 3
- // CHECK-DAG: %[[V3_ZERO_F32:.+]] = OpConstantNull %[[V3F32T]]
- %v3_c0_f32 = llvm.mlir.constant(dense<0.> : vector<3xf32>) : vector<3xf32>
- // CHECK-DAG: %[[V4F64T:.+]] = OpTypeVector %[[F64T]] 4
- // CHECK-DAG: %[[V4_ZERO_F64:.+]] = OpConstantNull %[[V4F64T]]
- %v4_c0_f64 = llvm.mlir.constant(dense<0.> : vector<4xf64>) : vector<4xf64>
- // CHECK-DAG: %[[V8F64T:.+]] = OpTypeVector %[[F64T]] 8
- // CHECK-DAG: %[[V8_ZERO_F64:.+]] = OpConstantNull %[[V8F64T]]
- %v8_c0_f64 = llvm.mlir.constant(dense<0.> : vector<8xf64>) : vector<8xf64>
- // CHECK-DAG: %[[V16F16T:.+]] = OpTypeVector %[[F16T]] 16
- // CHECK-DAG: %[[V16_ZERO_F16:.+]] = OpConstantNull %[[V16F16T]]
- %v16_c0_f16 = llvm.mlir.constant(dense<0.> : vector<16xf16>) : vector<16xf16>
-
- // CHECK: OpExtInst %[[F16T]] %{{.+}} native_exp %[[ZERO_F16]]
- %exp_f16 = llvm.call @_Z22__spirv_ocl_native_expDh(%c0_f16) : (f16) -> f16
- // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp %[[ZERO_F32]]
- %exp_f32 = llvm.call @_Z22__spirv_ocl_native_expf(%c0_f32) : (f32) -> f32
- // CHECK: OpExtInst %[[F64T]] %{{.+}} native_exp %[[ZERO_F64]]
- %exp_f64 = llvm.call @_Z22__spirv_ocl_native_expd(%c0_f64) : (f64) -> f64
-
- // CHECK: OpExtInst %[[V2F64T]] %{{.+}} native_exp %[[V2_ZERO_F64]]
- %exp_v2_f64 = llvm.call @_Z22__spirv_ocl_native_expDv2_f64(%v2_c0_f64) : (vector<2xf64>) -> vector<2xf64>
- // CHECK: OpExtInst %[[V3F32T]] %{{.+}} native_exp %[[V3_ZERO_F32]]
- %exp_v3_f32 = llvm.call @_Z22__spirv_ocl_native_expDv3_f32(%v3_c0_f32) : (vector<3xf32>) -> vector<3xf32>
- // CHECK: OpExtInst %[[V4F64T]] %{{.+}} native_exp %[[V4_ZERO_F64]]
- %exp_v4_f64 = llvm.call @_Z22__spirv_ocl_native_expDv4_f64(%v4_c0_f64) : (vector<4xf64>) -> vector<4xf64>
- // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_exp %[[V8_ZERO_F64]]
- %exp_v8_f64 = llvm.call @_Z22__spirv_ocl_native_expDv8_f64(%v8_c0_f64) : (vector<8xf64>) -> vector<8xf64>
- // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_exp %[[V16_ZERO_F16]]
- %exp_v16_f16 = llvm.call @_Z22__spirv_ocl_native_expDv16_f16(%v16_c0_f16) : (vector<16xf16>) -> vector<16xf16>
-
- // SPIRV backend does not currently handle fastmath flags: The SPIRV
- // backend would need to generate OpDecorate calls to decorate math ops
- // with FPFastMathMode/FPFastMathModeINTEL decorations.
- //
- // FIXME: When support for fastmath flags in the SPIRV backend is added,
- // add tests here to ensure fastmath flags are converted to the correct
- // OpDecorate calls.
- //
- // See:
- // - https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_math_extended_instructions
- // - https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpDecorate
-
- // CHECK: OpExtInst %[[F16T]] %{{.+}} native_cos %[[ZERO_F16]]
- %cos_afn_f16 = llvm.call @_Z22__spirv_ocl_native_cosDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
- // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp2 %[[ZERO_F32]]
- %exp2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_exp2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
- // CHECK: OpExtInst %[[F16T]] %{{.+}} native_log %[[ZERO_F16]]
- %log_afn_f16 = llvm.call @_Z22__spirv_ocl_native_logDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
- // CHECK: OpExtInst %[[F32T]] %{{.+}} native_log2 %[[ZERO_F32]]
- %log2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_log2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
- // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_log10 %[[V8_ZERO_F64]]
- %log10_afn_f64 = llvm.call @_Z24__spirv_ocl_native_log10Dv8_d(%v8_c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
- // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_powr %[[V16_ZERO_F16]] %[[V16_ZERO_F16]]
- %powr_afn_f16 = llvm.call @_Z23__spirv_ocl_native_powrDv16_DhS_(%v16_c0_f16, %v16_c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf16>, vector<16xf16>) -> vector<16xf16>
- // CHECK: OpExtInst %[[F64T]] %{{.+}} native_rsqrt %[[ZERO_F64]]
- %rsqrt_afn_f64 = llvm.call @_Z24__spirv_ocl_native_rsqrtd(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
- // CHECK: OpExtInst %[[F16T]] %{{.+}} native_sin %[[ZERO_F16]]
- %sin_afn_f16 = llvm.call @_Z22__spirv_ocl_native_sinDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
- // CHECK: OpExtInst %[[F32T]] %{{.+}} native_sqrt %[[ZERO_F32]]
- %sqrt_afn_f32 = llvm.call @_Z23__spirv_ocl_native_sqrtf(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
- // CHECK: OpExtInst %[[F64T]] %{{.+}} native_tan %[[ZERO_F64]]
- %tan_afn_f64 = llvm.call @_Z22__spirv_ocl_native_tand(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
- // CHECK: OpExtInst %[[F32T]] %{{.+}} native_divide %[[ZERO_F32]] %[[ZERO_F32]]
- %divide_afn_f32 = llvm.call @_Z25__spirv_ocl_native_divideff(%c0_f32, %c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
-
- llvm.return
- }
-
- llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
- llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
- llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
- llvm.func @_Z22__spirv_ocl_native_expDv2_f64(vector<2xf64>) -> vector<2xf64>
- llvm.func @_Z22__spirv_ocl_native_expDv3_f32(vector<3xf32>) -> vector<3xf32>
- llvm.func @_Z22__spirv_ocl_native_expDv4_f64(vector<4xf64>) -> vector<4xf64>
- llvm.func @_Z22__spirv_ocl_native_expDv8_f64(vector<8xf64>) -> vector<8xf64>
- llvm.func @_Z22__spirv_ocl_native_expDv16_f16(vector<16xf16>) -> vector<16xf16>
- llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
- llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
- llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
- llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
- llvm.func @_Z24__spirv_ocl_native_log10Dv8_d(vector<8xf64>) -> vector<8xf64>
- llvm.func @_Z23__spirv_ocl_native_powrDv16_DhS_(vector<16xf16>, vector<16xf16>) -> vector<16xf16>
- llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
- llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
- llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
- llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
- llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
- }
-}
>From 35c0fc8ddac569809cd84828b69cc49c3e73a07c Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Mon, 6 Apr 2026 20:19:24 +0000
Subject: [PATCH 3/3] Address review comment.
---
mlir/lib/Target/LLVM/XeVM/Target.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index e95771d9317a0..f8f09258fd861 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -312,6 +312,7 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
Opts.push_back(triple.str());
Opts.push_back(std::to_string(optLevel));
+ // Translate the LLVM module to SPIR-V binary using LLVM's SPIR-V Backend API.
bool success =
SPIRVTranslateModule(&llvmModule, serializedSPIRVBinary, ErrMsg,
getDefaultSPIRVExtensions(), Opts);
@@ -331,7 +332,7 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
// Return SPIRV binary if the compilation target is `assembly`. Optimization
// and SPIR-V extensions are enabled for SPIR-V binary output in both paths
// (assembly and binary) as of now. SPIR-V binary
- // is generated directly using the SPIR-V backends `SPIRVTranslate` API.
+ // is generated directly using the SPIR-V backends `SPIRVTranslateModule` API.
if (targetOptions.getCompilationTarget() ==
gpu::CompilationTarget::Assembly) {
#define DEBUG_TYPE "serialize-to-isa"
More information about the Mlir-commits
mailing list