[Mlir-commits] [mlir] a8b2c96 - [NFC][MLIR][OpenMP] Modify tests to have allocas in the correct address space for AMDGPU
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Apr 25 18:41:29 PDT 2025
Author: agozillon
Date: 2025-04-25T20:41:08-05:00
New Revision: a8b2c96d00c408d10eba011facf6121b2ce72c06
URL: https://github.com/llvm/llvm-project/commit/a8b2c96d00c408d10eba011facf6121b2ce72c06
DIFF: https://github.com/llvm/llvm-project/commit/a8b2c96d00c408d10eba011facf6121b2ce72c06.diff
LOG: [NFC][MLIR][OpenMP] Modify tests to have allocas in the correct address space for AMDGPU
Added:
Modified:
mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
mlir/test/Target/LLVMIR/omptarget-debug.mlir
mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
Removed:
################################################################################
diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
index ae5cf97c958db..e9c77ef015336 100644
--- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
%1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
@@ -26,19 +26,21 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[DYN_PTR:.*]], ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) #{{[0-9]+}} {
// CHECK: entry:
-// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8
-// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8
+// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_BYREF]] to ptr
+// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_ASCAST]], align 8
+// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ALLOCA_ASCAST2:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_BYCOPY]] to ptr
+// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_ASCAST2]], align 8
// CHECK: user_code.entry: ; preds = %entry
-// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8
+// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
// CHECK: br label %outlined.body
// CHECK: outlined.body:
// CHECK: br label %omp.target
// CHECK: omp.target:
-// CHECK: %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4
+// CHECK: %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_ASCAST2]], align 4
// CHECK: store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4
// CHECK: br label %omp.region.cont
diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
index 0aa592fe1bee1..724e03885d146 100644
--- a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
@@ -10,22 +10,24 @@
// constant sized) allocations performs its task reasonably in these
// scenarios.
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
llvm.func @_QQmain() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
%1 = llvm.mlir.constant(1 : i64) : i64
%2 = llvm.alloca %1 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr
%3 = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.struct<(ptr)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
omp.target map_entries(%3 -> %arg0 : !llvm.ptr) {
%4 = llvm.mlir.constant(1 : i32) : i32
- %5 = llvm.alloca %4 x !llvm.struct<(ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %5 = llvm.alloca %4 x !llvm.struct<(ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+ %ascast1 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
%6 = llvm.mlir.constant(50 : i32) : i32
%7 = llvm.mlir.constant(1 : i64) : i64
- %8 = llvm.alloca %7 x i32 : (i64) -> !llvm.ptr
- llvm.store %6, %8 : i32, !llvm.ptr
+ %8 = llvm.alloca %7 x i32 : (i64) -> !llvm.ptr<5>
+ %ascast2 = llvm.addrspacecast %8 : !llvm.ptr<5> to !llvm.ptr
+ llvm.store %6, %ascast2 : i32, !llvm.ptr
%9 = llvm.mlir.undef : !llvm.struct<(ptr)>
- %10 = llvm.insertvalue %8, %9[0] : !llvm.struct<(ptr)>
- llvm.store %10, %5 : !llvm.struct<(ptr)>, !llvm.ptr
- %88 = llvm.call @_ExternalCall(%arg0, %5) : (!llvm.ptr, !llvm.ptr) -> !llvm.struct<()>
+ %10 = llvm.insertvalue %ascast2, %9[0] : !llvm.struct<(ptr)>
+ llvm.store %10, %ascast1 : !llvm.struct<(ptr)>, !llvm.ptr
+ %88 = llvm.call @_ExternalCall(%arg0, %ascast1) : (!llvm.ptr, !llvm.ptr) -> !llvm.struct<()>
omp.terminator
}
llvm.return
diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
index 59bdf220b0f6a..ed66ff2c9ad7e 100644
--- a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
llvm.func @_QQmain() attributes {bindc_name = "main"} {
%0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
%1 = llvm.mlir.constant(10 : index) : i64
@@ -32,9 +32,10 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
-// CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ALLOCA]], align 8
-// CHECK: %[[LOAD_ARG1_ALLOCA:.*]] = load ptr, ptr %[[ARG1_ALLOCA]], align 8
+// CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ARG1_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ARG1_ALLOCA]] to ptr
+// CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ASCAST]], align 8
+// CHECK: %[[LOAD_ARG1_ALLOCA:.*]] = load ptr, ptr %[[ARG1_ASCAST]], align 8
// CHECK: store i32 20, ptr %[[LOAD_ARG1_ALLOCA]], align 4
// CHECK: %[[GEP_ARG1_ALLOCA:.*]] = getelementptr inbounds nuw i8, ptr %[[LOAD_ARG1_ALLOCA]], i64 16
// CHECK: store i32 10, ptr %[[GEP_ARG1_ALLOCA]], align 4
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
index 950f59f3e7ba5..ea92589bbd031 100644
--- a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
@@ -26,20 +26,22 @@
#var_x = #llvm.di_local_variable<scope = #sp,
name = "x", file = #file, line = 12, type = #real_ty>
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
llvm.func @test() {
%0 = llvm.mlir.constant(1 : i64) : i64
- %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr
- %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr
+ %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr<5>
+ %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
+ %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %ascast2 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
%6 = llvm.mlir.constant(9 : index) : i64
%7 = llvm.mlir.constant(0 : index) : i64
%8 = llvm.mlir.constant(1 : index) : i64
%10 = llvm.mlir.constant(10 : index) : i64
%11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
- %14 = omp.map.info var_ptr(%1 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+ %14 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
%15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64)
%16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr
- %17 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
+ %17 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr
llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug.mlir b/mlir/test/Target/LLVMIR/omptarget-debug.mlir
index dc2df5fdfba05..9c8344d69dc74 100644
--- a/mlir/test/Target/LLVMIR/omptarget-debug.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug.mlir
@@ -1,10 +1,11 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(1 : i32) : i32
- %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
- %9 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr<5>
+ %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %9 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
omp.target map_entries(%9 -> %arg0 : !llvm.ptr) {
%13 = llvm.mlir.constant(1 : i32) : i32
llvm.store %13, %arg0 : i32, !llvm.ptr loc(#loc2)
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index de965f99fd4a1..593d8010f55de 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -33,8 +33,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
llvm.func @parallel_if(%arg0: !llvm.ptr {fir.bindc_name = "ifcond"}) {
%0 = llvm.mlir.constant(1 : i64) : i64
- %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr
- %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
+ %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr<5>
+ %cast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %2 = omp.map.info var_ptr(%cast : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
%3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "ifcond"}
omp.target map_entries(%2 -> %arg1, %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
%4 = llvm.mlir.constant(10 : i32) : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
index 5938aaeafa5ef..5a76871c180ab 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
@@ -1,25 +1,28 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
llvm.func @omp_target_region_() {
%0 = llvm.mlir.constant(20 : i32) : i32
%1 = llvm.mlir.constant(10 : i32) : i32
%2 = llvm.mlir.constant(1 : i64) : i64
- %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
%4 = llvm.mlir.constant(1 : i64) : i64
- %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+ %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
%6 = llvm.mlir.constant(1 : i64) : i64
- %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
- llvm.store %1, %3 : i32, !llvm.ptr
- llvm.store %0, %5 : i32, !llvm.ptr
- %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
- %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
- %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
+ %8 = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
+ %9 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
+ %10 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+ llvm.store %1, %8 : i32, !llvm.ptr
+ llvm.store %0, %9 : i32, !llvm.ptr
+ %map1 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map3 = omp.map.info var_ptr(%10 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
- %8 = llvm.load %arg0 : !llvm.ptr -> i32
- %9 = llvm.load %arg1 : !llvm.ptr -> i32
- %10 = llvm.add %8, %9 : i32
- llvm.store %10, %arg2 : i32, !llvm.ptr
+ %11 = llvm.load %arg0 : !llvm.ptr -> i32
+ %12 = llvm.load %arg1 : !llvm.ptr -> i32
+ %13 = llvm.add %11, %12 : i32
+ llvm.store %13, %arg2 : i32, !llvm.ptr
omp.terminator
}
llvm.return
@@ -31,19 +34,22 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
// CHECK: @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
// CHECK: @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[DYN_PTR:.*]], ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
-// CHECK: %[[TMP_A:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
+// CHECK: %[[TMP_A:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ASCAST_A:.*]] = addrspacecast ptr addrspace(5) %[[TMP_A]] to ptr
+// CHECK: store ptr %[[ADDR_A]], ptr %[[ASCAST_A]], align 8
// CHECK: %[[TMP_B:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ADDR_B]], ptr %[[TMP_B]], align 8
+// CHECK: %[[ASCAST_B:.*]] = addrspacecast ptr addrspace(5) %[[TMP_B]] to ptr
+// CHECK: store ptr %[[ADDR_B]], ptr %[[ASCAST_B]], align 8
// CHECK: %[[TMP_C:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ADDR_C]], ptr %[[TMP_C]], align 8
+// CHECK: %[[ASCAST_C:.*]] = addrspacecast ptr addrspace(5) %[[TMP_C]] to ptr
+// CHECK: store ptr %[[ADDR_C]], ptr %[[ASCAST_C]], align 8
// CHECK: %[[INIT:.*]] = call i32 @__kmpc_target_init(ptr @[[KERNEL_ENV]], ptr %[[DYN_PTR]])
// CHECK-NEXT: %[[CMP:.*]] = icmp eq i32 %[[INIT]], -1
// CHECK-NEXT: br i1 %[[CMP]], label %[[LABEL_ENTRY:.*]], label %[[LABEL_EXIT:.*]]
// CHECK: [[LABEL_ENTRY]]:
-// CHECK: %[[PTR_A:.*]] = load ptr, ptr %[[TMP_A]], align 8
-// CHECK: %[[PTR_B:.*]] = load ptr, ptr %[[TMP_B]], align 8
-// CHECK: %[[PTR_C:.*]] = load ptr, ptr %[[TMP_C]], align 8
+// CHECK: %[[PTR_A:.*]] = load ptr, ptr %[[ASCAST_A]], align 8
+// CHECK: %[[PTR_B:.*]] = load ptr, ptr %[[ASCAST_B]], align 8
+// CHECK: %[[PTR_C:.*]] = load ptr, ptr %[[ASCAST_C]], align 8
// CHECK-NEXT: br label %[[LABEL_TARGET:.*]]
// CHECK: [[LABEL_TARGET]]:
// CHECK: %[[A:.*]] = load i32, ptr %[[PTR_A]], align 4
diff --git a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
index d4743ea88d9d9..16be0773bd14b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
@@ -1,21 +1,24 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
llvm.func @omp_target_region_() {
%0 = llvm.mlir.constant(20 : i32) : i32
%1 = llvm.mlir.constant(10 : i32) : i32
%2 = llvm.mlir.constant(1 : i64) : i64
- %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
+ %ascast = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
%4 = llvm.mlir.constant(1 : i64) : i64
- %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+ %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
+ %ascast2 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
%6 = llvm.mlir.constant(1 : i64) : i64
- %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
- llvm.store %1, %3 : i32, !llvm.ptr
- llvm.store %0, %5 : i32, !llvm.ptr
+ %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
+ %ascast3 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+ llvm.store %1, %ascast : i32, !llvm.ptr
+ llvm.store %0, %ascast2 : i32, !llvm.ptr
omp.task {
- %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
- %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
- %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map1 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map3 = omp.map.info var_ptr(%ascast3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
%8 = llvm.load %arg0 : !llvm.ptr -> i32
%9 = llvm.load %arg1 : !llvm.ptr -> i32
@@ -36,5 +39,5 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
}
}
-// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19
+// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l22
// CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir b/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
index b0c641c65f9fb..ba182374a9e3b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
@@ -4,7 +4,7 @@
// omp.threadprivate does not crash on lowering during the OpenMP target device
// pass when used in conjunction with target code in the same module.
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
llvm.func @func() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
%0 = llvm.mlir.addressof @_QFEpointer2 : !llvm.ptr
%1 = omp.threadprivate %0 : !llvm.ptr -> !llvm.ptr
@@ -24,7 +24,8 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
}
// CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
-// CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG1]], ptr %[[ALLOCA]], align 8
-// CHECK: %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA]], align 8
+// CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA]] to ptr
+// CHECK: store ptr %[[ARG1]], ptr %[[ALLOCA_ASCAST]], align 8
+// CHECK: %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
// CHECK: store i32 1, ptr %[[LOAD_ALLOCA]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
index 8101660e571e4..9bb2b40a43def 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
@@ -60,7 +60,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
//--- device.mlir
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
llvm.func @main(%arg0 : !llvm.ptr) {
%0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr
omp.target map_entries(%0 -> %ptr : !llvm.ptr) {
diff --git a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
index a418445324817..c7f1490240182 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
@@ -12,7 +12,7 @@
// CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE2:1]], i32 [[MIN_THREADS2:1]], i32 [[MAX_THREADS2:30]], i32 [[MIN_TEAMS2:40]], i32 [[MAX_TEAMS2:40]], i32 0, i32 0 },
// CHECK-SAME: ptr @{{.*}}, ptr @{{.*}} }
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
llvm.func @main(%num_teams : !llvm.ptr) {
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}(ptr %[[KERNEL_ARGS:.*]], ptr %[[NUM_TEAMS_ARG:.*]]) #[[ATTRS1:[0-9]+]]
// CHECK: %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL1_ENV]], ptr %[[KERNEL_ARGS]])
diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
index c618b68d52aaf..cbf273b887bc7 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
omp.private {type = private} @i32_privatizer : i32
@@ -28,10 +28,11 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
llvm.func @test_nested_target_in_wsloop(%arg0: !llvm.ptr) {
%8 = llvm.mlir.constant(1 : i64) : i64
- %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+ %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
+ %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
%16 = llvm.mlir.constant(10 : i32) : i32
%17 = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop private(@i32_privatizer %9 -> %loop_arg : !llvm.ptr) {
+ omp.wsloop private(@i32_privatizer %ascast -> %loop_arg : !llvm.ptr) {
omp.loop_nest (%arg1) : i32 = (%17) to (%16) inclusive step (%17) {
llvm.store %arg1, %loop_arg : i32, !llvm.ptr
%0 = llvm.mlir.constant(4 : index) : i64
@@ -48,7 +49,8 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
}
// CHECK-LABEL: define void @test_nested_target_in_wsloop(ptr %0) {
-// CHECK-NEXT: %{{.*}} = alloca i32, i64 1, align 4
+// CHECK-NEXT: %{{.*}} = alloca i32, i64 1, align 4, addrspace(5)
+// CHECK-NEXT: %{{.*}} = addrspacecast ptr addrspace(5) %{{.*}} to ptr
// CHECK-NEXT: br label %omp.wsloop.fake.region
// CHECK: omp.wsloop.fake.region:
// CHECK-NEXT: br label %omp.loop_nest.fake.region
@@ -63,8 +65,9 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
llvm.func @test_nested_target_in_parallel_with_private(%arg0: !llvm.ptr) {
%8 = llvm.mlir.constant(1 : i64) : i64
- %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
- omp.parallel private(@i32_privatizer %9 -> %i_priv_arg : !llvm.ptr) {
+ %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
+ %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
+ omp.parallel private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
%1 = llvm.mlir.constant(1 : index) : i64
// Use the private clause from omp.parallel to make sure block arguments
// are handled.
@@ -81,8 +84,9 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
llvm.func @test_nested_target_in_task_with_private(%arg0: !llvm.ptr) {
%8 = llvm.mlir.constant(1 : i64) : i64
- %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
- omp.task private(@i32_privatizer %9 -> %i_priv_arg : !llvm.ptr) {
+ %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
+ %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
+ omp.task private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
%1 = llvm.mlir.constant(1 : index) : i64
// Use the private clause from omp.task to make sure block arguments
// are handled.
diff --git a/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
index 7930554cbe11a..86dff678bf639 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
@@ -53,7 +53,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
//--- device.mlir
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
llvm.func @main(%x : i32) {
omp.target host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) {
omp.teams {
diff --git a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
index ff580e5fea634..9c6b06e3aab96 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
@@ -5,13 +5,14 @@
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading{{.*}}main_
// CHECK-NEXT: entry:
-// CHECK-NEXT: %[[VAL_3:.*]] = alloca ptr, align 8
-// CHECK-NEXT: store ptr %[[VAL_4:.*]], ptr %[[VAL_3]], align 8
+// CHECK-NEXT: %[[VAL_3:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: %[[ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[VAL_3]] to ptr
+// CHECK-NEXT: store ptr %[[VAL_4:.*]], ptr %[[ASCAST]], align 8
// CHECK-NEXT: %[[VAL_5:.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_{{.*}}_kernel_environment, ptr %[[VAL_6:.*]])
// CHECK-NEXT: %[[VAL_7:.*]] = icmp eq i32 %[[VAL_5]], -1
// CHECK-NEXT: br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
// CHECK: user_code.entry: ; preds = %[[VAL_10:.*]]
-// CHECK-NEXT: %[[VAL_11:.*]] = load ptr, ptr %[[VAL_3]], align 8
+// CHECK-NEXT: %[[VAL_11:.*]] = load ptr, ptr %[[ASCAST]], align 8
// CHECK-NEXT: br label %[[AFTER_ALLOC:.*]]
// CHECK: [[AFTER_ALLOC]]:
@@ -24,11 +25,12 @@
// CHECK-NEXT: %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8
// CHECK-NEXT: store i32 999, ptr %[[VAL_13]], align 4
// CHECK-NEXT: br label %[[VAL_14:.*]]
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.constant(1 : i64) : i64
- %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr
- %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr<5>
+ %ascast = llvm.addrspacecast %a : !llvm.ptr<5> to !llvm.ptr
+ %map = omp.map.info var_ptr(%ascast : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
omp.target_data use_device_ptr(%map -> %arg0 : !llvm.ptr) {
%map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
omp.target map_entries(%map1 -> %arg1 : !llvm.ptr){
diff --git a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
index f09257d091324..2ce2424cf9541 100644
--- a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
@@ -3,7 +3,7 @@
// This tests the fix for https://github.com/llvm/llvm-project/issues/84606
// We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
// CHECK: {{.*}} = add i32 {{.*}}, 5
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
%0 = llvm.mlir.constant(0 : i32) : i32
%1 = llvm.mlir.constant(1 : i64) : i64
More information about the Mlir-commits
mailing list