[Mlir-commits] [mlir] a8b2c96 - [NFC][MLIR][OpenMP] Modify tests to have allocas in the correct address space for AMDGPU

Fri Apr 25 18:41:29 PDT 2025

Author: agozillon
Date: 2025-04-25T20:41:08-05:00
New Revision: a8b2c96d00c408d10eba011facf6121b2ce72c06

URL: https://github.com/llvm/llvm-project/commit/a8b2c96d00c408d10eba011facf6121b2ce72c06
DIFF: https://github.com/llvm/llvm-project/commit/a8b2c96d00c408d10eba011facf6121b2ce72c06.diff

LOG: [NFC][MLIR][OpenMP] Modify tests to have allocas in the correct address space for AMDGPU

Added: 
    

Modified: 
    mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
    mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
    mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
    mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
    mlir/test/Target/LLVMIR/omptarget-debug.mlir
    mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
    mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
    mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
    mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
    mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
    mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
    mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
    mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
    mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
    mlir/test/Target/LLVMIR/openmp-task-target-device.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
index ae5cf97c958db..e9c77ef015336 100644

--- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
     %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
     %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
@@ -26,19 +26,21 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 // CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[DYN_PTR:.*]], ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) #{{[0-9]+}} {
 
 // CHECK: entry:
-// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8
-// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8
+// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_BYREF]] to ptr
+// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_ASCAST]], align 8
+// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ALLOCA_ASCAST2:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_BYCOPY]] to ptr
+// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_ASCAST2]], align 8
 
 // CHECK: user_code.entry:                                  ; preds = %entry
-// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8
+// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
 // CHECK: br label %outlined.body
 
 // CHECK: outlined.body:
 // CHECK: br label %omp.target
 
 // CHECK: omp.target:
-// CHECK:  %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4
+// CHECK:  %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_ASCAST2]], align 4
 // CHECK:  store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4
 // CHECK: br label %omp.region.cont

diff  --git a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
index 0aa592fe1bee1..724e03885d146 100644
--- a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
@@ -10,22 +10,24 @@
 // constant sized) allocations performs its task reasonably in these 
 // scenarios. 
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
     %1 = llvm.mlir.constant(1 : i64) : i64
     %2 = llvm.alloca %1 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr
     %3 = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.struct<(ptr)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
     omp.target map_entries(%3 -> %arg0 : !llvm.ptr) {
       %4 = llvm.mlir.constant(1 : i32) : i32
-      %5 = llvm.alloca %4 x !llvm.struct<(ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+      %5 = llvm.alloca %4 x !llvm.struct<(ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+      %ascast1 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
       %6 = llvm.mlir.constant(50 : i32) : i32
       %7 = llvm.mlir.constant(1 : i64) : i64
-      %8 = llvm.alloca %7 x i32 : (i64) -> !llvm.ptr
-      llvm.store %6, %8 : i32, !llvm.ptr
+      %8 = llvm.alloca %7 x i32 : (i64) -> !llvm.ptr<5>
+      %ascast2 = llvm.addrspacecast %8 : !llvm.ptr<5> to !llvm.ptr
+      llvm.store %6, %ascast2 : i32, !llvm.ptr
       %9 = llvm.mlir.undef : !llvm.struct<(ptr)>
-      %10 = llvm.insertvalue %8, %9[0] : !llvm.struct<(ptr)> 
-      llvm.store %10, %5 : !llvm.struct<(ptr)>, !llvm.ptr
-      %88 = llvm.call @_ExternalCall(%arg0, %5) : (!llvm.ptr, !llvm.ptr) -> !llvm.struct<()>
+      %10 = llvm.insertvalue %ascast2, %9[0] : !llvm.struct<(ptr)> 
+      llvm.store %10, %ascast1 : !llvm.struct<(ptr)>, !llvm.ptr
+      %88 = llvm.call @_ExternalCall(%arg0, %ascast1) : (!llvm.ptr, !llvm.ptr) -> !llvm.struct<()>
       omp.terminator
     }
     llvm.return

diff  --git a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
index 59bdf220b0f6a..ed66ff2c9ad7e 100644
--- a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {bindc_name = "main"} {
     %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
     %1 = llvm.mlir.constant(10 : index) : i64
@@ -32,9 +32,10 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 
 // CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
 
-// CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ALLOCA]], align 8
-// CHECK: %[[LOAD_ARG1_ALLOCA:.*]] = load ptr, ptr %[[ARG1_ALLOCA]], align 8
+// CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ARG1_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ARG1_ALLOCA]] to ptr
+// CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ASCAST]], align 8
+// CHECK: %[[LOAD_ARG1_ALLOCA:.*]] = load ptr, ptr %[[ARG1_ASCAST]], align 8
 // CHECK: store i32 20, ptr %[[LOAD_ARG1_ALLOCA]], align 4
 // CHECK: %[[GEP_ARG1_ALLOCA:.*]] = getelementptr inbounds nuw i8, ptr %[[LOAD_ARG1_ALLOCA]], i64 16
 // CHECK: store i32 10, ptr %[[GEP_ARG1_ALLOCA]], align 4

diff  --git a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
index 950f59f3e7ba5..ea92589bbd031 100644
--- a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
@@ -26,20 +26,22 @@
 #var_x = #llvm.di_local_variable<scope = #sp,
  name = "x", file = #file, line = 12, type = #real_ty>
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @test() {
     %0 = llvm.mlir.constant(1 : i64) : i64
-    %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr
-    %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr
+    %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr<5>
+    %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %ascast2 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
     %6 = llvm.mlir.constant(9 : index) : i64
     %7 = llvm.mlir.constant(0 : index) : i64
     %8 = llvm.mlir.constant(1 : index) : i64
     %10 = llvm.mlir.constant(10 : index) : i64
     %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
-    %14 = omp.map.info var_ptr(%1 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %14 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
     %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64)
     %16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr
-    %17 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
+    %17 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
     omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
       llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr
       llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr

diff  --git a/mlir/test/Target/LLVMIR/omptarget-debug.mlir b/mlir/test/Target/LLVMIR/omptarget-debug.mlir
index dc2df5fdfba05..9c8344d69dc74 100644
--- a/mlir/test/Target/LLVMIR/omptarget-debug.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug.mlir
@@ -1,10 +1,11 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() {
     %0 = llvm.mlir.constant(1 : i32) : i32
-    %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
-    %9 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %9 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
     omp.target map_entries(%9 -> %arg0 : !llvm.ptr) {
       %13 = llvm.mlir.constant(1 : i32) : i32
       llvm.store %13, %arg0 : i32, !llvm.ptr loc(#loc2)

diff  --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index de965f99fd4a1..593d8010f55de 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -33,8 +33,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
 
   llvm.func @parallel_if(%arg0: !llvm.ptr {fir.bindc_name = "ifcond"}) {
     %0 = llvm.mlir.constant(1 : i64) : i64
-    %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr
-    %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
+    %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr<5>
+    %cast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %2 = omp.map.info var_ptr(%cast : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
     %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "ifcond"}
     omp.target map_entries(%2 -> %arg1, %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
       %4 = llvm.mlir.constant(10 : i32) : i32

diff  --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
index 5938aaeafa5ef..5a76871c180ab 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
@@ -1,25 +1,28 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @omp_target_region_() {
     %0 = llvm.mlir.constant(20 : i32) : i32
     %1 = llvm.mlir.constant(10 : i32) : i32
     %2 = llvm.mlir.constant(1 : i64) : i64
-    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
     %4 = llvm.mlir.constant(1 : i64) : i64
-    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
     %6 = llvm.mlir.constant(1 : i64) : i64
-    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
-    llvm.store %1, %3 : i32, !llvm.ptr
-    llvm.store %0, %5 : i32, !llvm.ptr
-    %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-    %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-    %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
+    %8 = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
+    %9 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
+    %10 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+    llvm.store %1, %8 : i32, !llvm.ptr
+    llvm.store %0, %9 : i32, !llvm.ptr
+    %map1 = omp.map.info var_ptr(%8 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%9 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %map3 = omp.map.info var_ptr(%10 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
     omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-      %8 = llvm.load %arg0 : !llvm.ptr -> i32
-      %9 = llvm.load %arg1 : !llvm.ptr -> i32
-      %10 = llvm.add %8, %9  : i32
-      llvm.store %10, %arg2 : i32, !llvm.ptr
+      %11 = llvm.load %arg0 : !llvm.ptr -> i32
+      %12 = llvm.load %arg1 : !llvm.ptr -> i32
+      %13 = llvm.add %11, %12  : i32
+      llvm.store %13, %arg2 : i32, !llvm.ptr
       omp.terminator
     }
     llvm.return
@@ -31,19 +34,22 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 // CHECK:      @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
 // CHECK:      @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
 // CHECK:      define weak_odr protected amdgpu_kernel void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[DYN_PTR:.*]], ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
-// CHECK:        %[[TMP_A:.*]] = alloca ptr, align 8
-// CHECK:        store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
+// CHECK:        %[[TMP_A:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK:        %[[ASCAST_A:.*]] = addrspacecast ptr addrspace(5) %[[TMP_A]] to ptr
+// CHECK:        store ptr %[[ADDR_A]], ptr %[[ASCAST_A]], align 8
 // CHECK:        %[[TMP_B:.*]] = alloca ptr, align 8
-// CHECK:        store ptr %[[ADDR_B]], ptr %[[TMP_B]], align 8
+// CHECK:        %[[ASCAST_B:.*]] = addrspacecast ptr addrspace(5) %[[TMP_B]] to ptr
+// CHECK:        store ptr %[[ADDR_B]], ptr %[[ASCAST_B]], align 8
 // CHECK:        %[[TMP_C:.*]] = alloca ptr, align 8
-// CHECK:        store ptr %[[ADDR_C]], ptr %[[TMP_C]], align 8
+// CHECK:        %[[ASCAST_C:.*]] = addrspacecast ptr addrspace(5) %[[TMP_C]] to ptr
+// CHECK:        store ptr %[[ADDR_C]], ptr %[[ASCAST_C]], align 8
 // CHECK:        %[[INIT:.*]] = call i32 @__kmpc_target_init(ptr @[[KERNEL_ENV]], ptr %[[DYN_PTR]])
 // CHECK-NEXT:   %[[CMP:.*]] = icmp eq i32 %[[INIT]], -1
 // CHECK-NEXT:   br i1 %[[CMP]], label %[[LABEL_ENTRY:.*]], label %[[LABEL_EXIT:.*]]
 // CHECK:        [[LABEL_ENTRY]]:
-// CHECK:        %[[PTR_A:.*]] = load ptr, ptr %[[TMP_A]], align 8
-// CHECK:        %[[PTR_B:.*]] = load ptr, ptr %[[TMP_B]], align 8
-// CHECK:        %[[PTR_C:.*]] = load ptr, ptr %[[TMP_C]], align 8
+// CHECK:        %[[PTR_A:.*]] = load ptr, ptr %[[ASCAST_A]], align 8
+// CHECK:        %[[PTR_B:.*]] = load ptr, ptr %[[ASCAST_B]], align 8
+// CHECK:        %[[PTR_C:.*]] = load ptr, ptr %[[ASCAST_C]], align 8
 // CHECK-NEXT:   br label %[[LABEL_TARGET:.*]]
 // CHECK:        [[LABEL_TARGET]]:
 // CHECK:        %[[A:.*]] = load i32, ptr %[[PTR_A]], align 4

diff  --git a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
index d4743ea88d9d9..16be0773bd14b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
@@ -1,21 +1,24 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
   llvm.func @omp_target_region_() {
     %0 = llvm.mlir.constant(20 : i32) : i32
     %1 = llvm.mlir.constant(10 : i32) : i32
     %2 = llvm.mlir.constant(1 : i64) : i64
-    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
     %4 = llvm.mlir.constant(1 : i64) : i64
-    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
+    %ascast2 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
     %6 = llvm.mlir.constant(1 : i64) : i64
-    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
-    llvm.store %1, %3 : i32, !llvm.ptr
-    llvm.store %0, %5 : i32, !llvm.ptr
+    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
+    %ascast3 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+    llvm.store %1, %ascast : i32, !llvm.ptr
+    llvm.store %0, %ascast2 : i32, !llvm.ptr
     omp.task {
-        %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+        %map1 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+        %map2 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+        %map3 = omp.map.info var_ptr(%ascast3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
       omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
         %8 = llvm.load %arg0 : !llvm.ptr -> i32
         %9 = llvm.load %arg1 : !llvm.ptr -> i32
@@ -36,5 +39,5 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
   }
 }
 
-// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19
+// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l22
 // CHECK: ret void

diff  --git a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir b/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
index b0c641c65f9fb..ba182374a9e3b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
@@ -4,7 +4,7 @@
 // omp.threadprivate does not crash on lowering during the OpenMP target device
 // pass when used in conjunction with target code in the same module.
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
   llvm.func @func() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
     %0 = llvm.mlir.addressof @_QFEpointer2 : !llvm.ptr
     %1 = omp.threadprivate %0 : !llvm.ptr -> !llvm.ptr
@@ -24,7 +24,8 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 }
 
 // CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
-// CHECK:  %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK:  store ptr %[[ARG1]], ptr %[[ALLOCA]], align 8
-// CHECK:  %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA]], align 8
+// CHECK:  %[[ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK:  %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA]] to ptr
+// CHECK:  store ptr %[[ARG1]], ptr %[[ALLOCA_ASCAST]], align 8
+// CHECK:  %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
 // CHECK:  store i32 1, ptr %[[LOAD_ALLOCA]], align 4

diff  --git a/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
index 8101660e571e4..9bb2b40a43def 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
@@ -60,7 +60,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 
 //--- device.mlir
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
   llvm.func @main(%arg0 : !llvm.ptr) {
     %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr
     omp.target map_entries(%0 -> %ptr : !llvm.ptr) {

diff  --git a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
index a418445324817..c7f1490240182 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
@@ -12,7 +12,7 @@
 // CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE2:1]], i32 [[MIN_THREADS2:1]], i32 [[MAX_THREADS2:30]], i32 [[MIN_TEAMS2:40]], i32 [[MAX_TEAMS2:40]], i32 0, i32 0 },
 // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}} }
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
   llvm.func @main(%num_teams : !llvm.ptr) {
     // CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}(ptr %[[KERNEL_ARGS:.*]], ptr %[[NUM_TEAMS_ARG:.*]]) #[[ATTRS1:[0-9]+]]
     // CHECK: %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL1_ENV]], ptr %[[KERNEL_ARGS]])

diff  --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
index c618b68d52aaf..cbf273b887bc7 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
 
   omp.private {type = private} @i32_privatizer : i32
 
@@ -28,10 +28,11 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
 
   llvm.func @test_nested_target_in_wsloop(%arg0: !llvm.ptr) {
     %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
     %16 = llvm.mlir.constant(10 : i32) : i32
     %17 = llvm.mlir.constant(1 : i32) : i32
-    omp.wsloop private(@i32_privatizer %9 -> %loop_arg : !llvm.ptr) {
+    omp.wsloop private(@i32_privatizer %ascast -> %loop_arg : !llvm.ptr) {
       omp.loop_nest (%arg1) : i32 = (%17) to (%16) inclusive step (%17) {
         llvm.store %arg1, %loop_arg : i32, !llvm.ptr
         %0 = llvm.mlir.constant(4 : index) : i64
@@ -48,7 +49,8 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
   }
 
 // CHECK-LABEL: define void @test_nested_target_in_wsloop(ptr %0) {
-// CHECK-NEXT:    %{{.*}} = alloca i32, i64 1, align 4
+// CHECK-NEXT:    %{{.*}} = alloca i32, i64 1, align 4, addrspace(5)
+// CHECK-NEXT:    %{{.*}} = addrspacecast ptr addrspace(5) %{{.*}} to ptr
 // CHECK-NEXT:    br label %omp.wsloop.fake.region
 // CHECK:       omp.wsloop.fake.region:
 // CHECK-NEXT:    br label %omp.loop_nest.fake.region
@@ -63,8 +65,9 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
 
   llvm.func @test_nested_target_in_parallel_with_private(%arg0: !llvm.ptr) {
     %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-    omp.parallel private(@i32_privatizer %9 -> %i_priv_arg : !llvm.ptr) {
+    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
+    omp.parallel private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
         %1 = llvm.mlir.constant(1 : index) : i64
         // Use the private clause from omp.parallel to make sure block arguments
         // are handled.
@@ -81,8 +84,9 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
 
   llvm.func @test_nested_target_in_task_with_private(%arg0: !llvm.ptr) {
     %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-    omp.task private(@i32_privatizer %9 -> %i_priv_arg : !llvm.ptr) {
+    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
+    omp.task private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
         %1 = llvm.mlir.constant(1 : index) : i64
         // Use the private clause from omp.task to make sure block arguments
         // are handled.

diff  --git a/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
index 7930554cbe11a..86dff678bf639 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
@@ -53,7 +53,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 
 //--- device.mlir
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
   llvm.func @main(%x : i32) {
     omp.target host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) {
       omp.teams {

diff  --git a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
index ff580e5fea634..9c6b06e3aab96 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
@@ -5,13 +5,14 @@
 
 // CHECK:         define weak_odr protected amdgpu_kernel void @__omp_offloading{{.*}}main_
 // CHECK-NEXT:       entry:
-// CHECK-NEXT:         %[[VAL_3:.*]] = alloca ptr, align 8
-// CHECK-NEXT:         store ptr %[[VAL_4:.*]], ptr %[[VAL_3]], align 8
+// CHECK-NEXT:         %[[VAL_3:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:         %[[ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[VAL_3]] to ptr
+// CHECK-NEXT:         store ptr %[[VAL_4:.*]], ptr %[[ASCAST]], align 8
 // CHECK-NEXT:         %[[VAL_5:.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_{{.*}}_kernel_environment, ptr %[[VAL_6:.*]])
 // CHECK-NEXT:         %[[VAL_7:.*]] = icmp eq i32 %[[VAL_5]], -1
 // CHECK-NEXT:         br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
 // CHECK:            user_code.entry:                                  ; preds = %[[VAL_10:.*]]
-// CHECK-NEXT:         %[[VAL_11:.*]] = load ptr, ptr %[[VAL_3]], align 8
+// CHECK-NEXT:         %[[VAL_11:.*]] = load ptr, ptr %[[ASCAST]], align 8
 // CHECK-NEXT:         br label %[[AFTER_ALLOC:.*]]
 
 // CHECK:            [[AFTER_ALLOC]]:
@@ -24,11 +25,12 @@
 // CHECK-NEXT:         %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8
 // CHECK-NEXT:         store i32 999, ptr %[[VAL_13]], align 4
 // CHECK-NEXT:         br label %[[VAL_14:.*]]
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
   llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
     %0 = llvm.mlir.constant(1 : i64) : i64
-    %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr
-    %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %a : !llvm.ptr<5> to !llvm.ptr
+    %map = omp.map.info var_ptr(%ascast : !llvm.ptr, !llvm.ptr)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
     omp.target_data use_device_ptr(%map -> %arg0 : !llvm.ptr)  {
       %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.ptr)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
       omp.target map_entries(%map1 -> %arg1 : !llvm.ptr){

diff  --git a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
index f09257d091324..2ce2424cf9541 100644
--- a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
@@ -3,7 +3,7 @@
 // This tests the fix for https://github.com/llvm/llvm-project/issues/84606
 // We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
 // CHECK: {{.*}} = add i32 {{.*}}, 5
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
     %0 = llvm.mlir.constant(0 : i32) : i32
     %1 = llvm.mlir.constant(1 : i64) : i64