[Mlir-commits] [mlir] 528caf9 - [MLIR] Fix GPU integration tests for SYCL and LevelZero runtime. (#171718)

Fri Dec 19 09:10:48 PST 2025

Author: Sang Ik Lee
Date: 2025-12-19T09:10:44-08:00
New Revision: 528caf99a7a1448b5e2e239b129e3150bf5fd949

URL: https://github.com/llvm/llvm-project/commit/528caf99a7a1448b5e2e239b129e3150bf5fd949
DIFF: https://github.com/llvm/llvm-project/commit/528caf99a7a1448b5e2e239b129e3150bf5fd949.diff

LOG: [MLIR] Fix GPU integration tests for SYCL and LevelZero runtime. (#171718)

i1 type load / store lowering does not work anymore for SPIR-V kernel
Rewrite test cases such that it does not use i1 load / store.

Added: 
    

Modified: 
    mlir/test/Integration/GPU/LevelZero/gpu-reluf32-to-spirv.mlir
    mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/test/Integration/GPU/LevelZero/gpu-reluf32-to-spirv.mlir b/mlir/test/Integration/GPU/LevelZero/gpu-reluf32-to-spirv.mlir
index 8d022ac1cf277..d0f21873e6e2c 100644

--- a/mlir/test/Integration/GPU/LevelZero/gpu-reluf32-to-spirv.mlir
+++ b/mlir/test/Integration/GPU/LevelZero/gpu-reluf32-to-spirv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,reconcile-unrealized-casts)' \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},func.func(gpu-async-region),convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
 // RUN: | mlir-runner \
 // RUN:   --shared-libs=%mlir_levelzero_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
@@ -35,51 +35,26 @@ module @relu attributes {gpu.container_module} {
   func.func @test(%arg0: memref<4x5xf32>) -> memref<4x5xf32> {
     %c5 = arith.constant 5 : index
     %c4 = arith.constant 4 : index
-    %cst = arith.constant 0.000000e+00 : f32
     %c1 = arith.constant 1 : index
-    %memref = gpu.alloc host_shared () : memref<4x5xf32>
-    memref.copy %arg0, %memref : memref<4x5xf32> to memref<4x5xf32>
-    %memref_0 = gpu.alloc host_shared () : memref<4x5xi1>
-    %2 = gpu.wait async
-    %3 = gpu.launch_func async [%2]  @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1)
-          args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>)
-    gpu.wait [%3]
-    %memref_1 = gpu.alloc host_shared () : memref<4x5xf32>
-    %4 = gpu.wait async
-    %5 = gpu.launch_func async [%4]  @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1)
-          args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32,
-    %memref_1 : memref<4x5xf32>)
-    gpu.wait [%5]
-    %alloc = memref.alloc() : memref<4x5xf32>
-    memref.copy %memref_1, %alloc : memref<4x5xf32> to memref<4x5xf32>
-    %6 = gpu.wait async
-    %7 = gpu.dealloc async [%6] %memref_1 : memref<4x5xf32>
-    %8 = gpu.dealloc async [%7] %memref_0 : memref<4x5xi1>
-    %9 = gpu.dealloc async [%8] %memref : memref<4x5xf32>
-    return %alloc : memref<4x5xf32>
+    %host_result = memref.alloc() : memref<4x5xf32>
+    %gpu_input = gpu.alloc() : memref<4x5xf32>
+    gpu.memcpy %gpu_input, %arg0 : memref<4x5xf32>, memref<4x5xf32>
+    %gpu_result = gpu.alloc() : memref<4x5xf32>
+    gpu.launch_func @test_kernel::@test_relu blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%gpu_input : memref<4x5xf32>,  %gpu_result : memref<4x5xf32>)
+    gpu.memcpy %host_result, %gpu_result : memref<4x5xf32>, memref<4x5xf32>
+    gpu.dealloc %gpu_input : memref<4x5xf32>
+    gpu.dealloc %gpu_result : memref<4x5xf32>
+    return %host_result : memref<4x5xf32>
   }
-  gpu.module @test_kernel
-  attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
-    gpu.func @test_kernel(%arg0: memref<4x5xf32>, %arg1: f32, %arg2: memref<4x5xi1>) kernel
-    attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+  gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
+    gpu.func @test_relu(%arg0: memref<4x5xf32>, %arg1: memref<4x5xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+      %zero = arith.constant 0.000000e+00 : f32
       %0 = gpu.block_id  x
       %1 = gpu.block_id  y
       %2 = memref.load %arg0[%0, %1] : memref<4x5xf32>
-      %3 = arith.cmpf olt, %2, %arg1 : f32
-      memref.store %3, %arg2[%0, %1] : memref<4x5xi1>
-      gpu.return
-    }
-  }
-  gpu.module @test_kernel_0
-  attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
-    gpu.func @test_kernel(%arg0: memref<4x5xi1>, %arg1: memref<4x5xf32>, %arg2: f32, %arg3: memref<4x5xf32>) kernel
-    attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
-      %0 = gpu.block_id  x
-      %1 = gpu.block_id  y
-      %2 = memref.load %arg0[%0, %1] : memref<4x5xi1>
-      %3 = memref.load %arg1[%0, %1] : memref<4x5xf32>
-      %4 = arith.select %2, %arg2, %3 : f32
-      memref.store %4, %arg3[%0, %1] : memref<4x5xf32>
+      %3 = arith.cmpf ogt, %2, %zero : f32
+      %4 = arith.select %3, %2, %zero : f32
+      memref.store %4, %arg1[%0, %1] : memref<4x5xf32>
       gpu.return
     }
   }

diff  --git a/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir
index e385daefcb9b5..9d45f405e9f0f 100644
--- a/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir
+++ b/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},func.func(gpu-async-region),convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
 // RUN: | mlir-runner \
 // RUN:   --shared-libs=%mlir_sycl_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
@@ -35,44 +35,26 @@ module @relu attributes {gpu.container_module} {
   func.func @test(%arg0: memref<4x5xf32>) -> memref<4x5xf32> {
     %c5 = arith.constant 5 : index
     %c4 = arith.constant 4 : index
-    %cst = arith.constant 0.000000e+00 : f32
     %c1 = arith.constant 1 : index
-    %memref = gpu.alloc host_shared () : memref<4x5xf32>
-    memref.copy %arg0, %memref : memref<4x5xf32> to memref<4x5xf32>
-    %memref_0 = gpu.alloc host_shared () : memref<4x5xi1>
-    %2 = gpu.wait async
-    %3 = gpu.launch_func async [%2]  @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>)
-    gpu.wait [%3]
-    %memref_1 = gpu.alloc host_shared () : memref<4x5xf32>
-    %4 = gpu.wait async
-    %5 = gpu.launch_func async [%4]  @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32, %memref_1 : memref<4x5xf32>)
-    gpu.wait [%5]
-    %alloc = memref.alloc() : memref<4x5xf32>
-    memref.copy %memref_1, %alloc : memref<4x5xf32> to memref<4x5xf32>
-    %6 = gpu.wait async
-    %7 = gpu.dealloc async [%6] %memref_1 : memref<4x5xf32>
-    %8 = gpu.dealloc async [%7] %memref_0 : memref<4x5xi1>
-    %9 = gpu.dealloc async [%8] %memref : memref<4x5xf32>
-    return %alloc : memref<4x5xf32>
+    %host_result = memref.alloc() : memref<4x5xf32>
+    %gpu_input = gpu.alloc() : memref<4x5xf32>
+    gpu.memcpy %gpu_input, %arg0 : memref<4x5xf32>, memref<4x5xf32>
+    %gpu_result = gpu.alloc() : memref<4x5xf32>
+    gpu.launch_func @test_kernel::@test_relu blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%gpu_input : memref<4x5xf32>,  %gpu_result : memref<4x5xf32>)
+    gpu.memcpy %host_result, %gpu_result : memref<4x5xf32>, memref<4x5xf32>
+    gpu.dealloc %gpu_input : memref<4x5xf32>
+    gpu.dealloc %gpu_result : memref<4x5xf32>
+    return %host_result : memref<4x5xf32>
   }
   gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
-    gpu.func @test_kernel(%arg0: memref<4x5xf32>, %arg1: f32, %arg2: memref<4x5xi1>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+    gpu.func @test_relu(%arg0: memref<4x5xf32>, %arg1: memref<4x5xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+      %zero = arith.constant 0.000000e+00 : f32
       %0 = gpu.block_id  x
       %1 = gpu.block_id  y
       %2 = memref.load %arg0[%0, %1] : memref<4x5xf32>
-      %3 = arith.cmpf olt, %2, %arg1 : f32
-      memref.store %3, %arg2[%0, %1] : memref<4x5xi1>
-      gpu.return
-    }
-  }
-  gpu.module @test_kernel_0 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
-    gpu.func @test_kernel(%arg0: memref<4x5xi1>, %arg1: memref<4x5xf32>, %arg2: f32, %arg3: memref<4x5xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
-      %0 = gpu.block_id  x
-      %1 = gpu.block_id  y
-      %2 = memref.load %arg0[%0, %1] : memref<4x5xi1>
-      %3 = memref.load %arg1[%0, %1] : memref<4x5xf32>
-      %4 = arith.select %2, %arg2, %3 : f32
-      memref.store %4, %arg3[%0, %1] : memref<4x5xf32>
+      %3 = arith.cmpf ogt, %2, %zero : f32
+      %4 = arith.select %3, %2, %zero : f32
+      memref.store %4, %arg1[%0, %1] : memref<4x5xf32>
       gpu.return
     }
   }