[Mlir-commits] [mlir] [NFC][MLIR][OpenMP] Add test for lowering omp target parallel (PR #70795)
Dominik Adamski
llvmlistbot at llvm.org
Tue Nov 7 03:25:39 PST 2023
https://github.com/DominikAdamski updated https://github.com/llvm/llvm-project/pull/70795
>From f9c80a6564aa8bf37c8345deedb081ae4eaa0234 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 31 Oct 2023 06:25:07 -0500
Subject: [PATCH 1/3] [NFC][MLIR][OpenMP] Add test for lowering omp target
parallel
Added MLIR test which checks if MLIR sample code with omp target
parallel construct is correctly lowered to LLVM IR for the device.
---
.../LLVMIR/omptarget-parallel-llvm.mlir | 78 +++++++++++++++++++
1 file changed, 78 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
new file mode 100644
index 000000000000000..8d321dab33ccdf6
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -0,0 +1,78 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// The aim of the test is to check the LLVM IR codegen for the device
+// for omp target parallel construct
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<3>, dense<32> : vector<4xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<1>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<2>, dense<32> : vector<4xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<8>, dense<128> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<6>, dense<32> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<7>, dense<[160, 256, 256, 32]> : vector<4xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<4>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<5>, dense<32> : vector<4xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<"dlti.stack_alignment", 32 : i32>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.flags = #omp.flags<debug_kind = 1, openmp_device_version = 11>, omp.is_gpu = true, omp.is_target_device = true, omp.requires = #omp<clause_requires none>, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">, omp.version = #omp.version<version = 11>} {
+ llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QQmain"} {
+ %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
+ omp.target map_entries(%0 : !llvm.ptr) {
+ omp.parallel {
+ %1 = llvm.mlir.constant(1 : i32) : i32
+ llvm.store %1, %arg0 : i32, !llvm.ptr
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC0:@.*]](
+// CHECK-SAME: ptr [[TMP0:%.*]]) {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8, addrspace(5)
+// CHECK-NEXT: [[STRUCTARG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRUCTARG]] to ptr
+// CHECK-NEXT: [[TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[TMP3]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) [[KERNEL_ENV:@.*]] to ptr))
+// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
+// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK: user_code.entry:
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK-NEXT: br label [[OMP_TARGET:%.*]]
+// CHECK: omp.target:
+// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+// CHECK: omp_parallel:
+// CHECK-NEXT: [[GEP_:%.*]] = getelementptr { ptr }, ptr addrspace(5) [[STRUCTARG]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP6]], ptr addrspace(5) [[GEP_]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[STRUCTARG_ASCAST]], ptr [[TMP7]], align 8
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr [[FUNC1:@.*]], ptr null, ptr [[TMP2]], i64 1)
+// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// CHECK: omp.par.outlined.exit:
+// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK: omp.par.exit.split:
+// CHECK-NEXT: br label [[OMP_REGION_CONT:%.*]]
+// CHECK: omp.region.cont:
+// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: ret void
+// CHECK: worker.exit:
+// CHECK-NEXT: ret void
+
+// CHECK: define internal void [[FUNC1]](
+// CHECK-SAME: ptr noalias noundef [[TID_ADDR_ASCAST:%.*]], ptr noalias noundef [[ZERO_ADDR_ASCAST:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: omp.par.entry:
+// CHECK-NEXT: [[GEP_:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[LOADGEP_:%.*]] = load ptr, ptr [[GEP_]], align 8
+// CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TID_ADDR_LOCAL]], align 4
+// CHECK-NEXT: [[TID:%.*]] = load i32, ptr addrspace(5) [[TID_ADDR_LOCAL]], align 4
+// CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+// CHECK: omp.par.region:
+// CHECK-NEXT: br label [[OMP_PAR_REGION2:%.*]]
+// CHECK: omp.par.region2:
+// CHECK-NEXT: store i32 1, ptr [[LOADGEP_]], align 4
+// CHECK-NEXT: br label [[OMP_REGION_CONT1:%.*]]
+// CHECK: omp.region.cont1:
+// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+// CHECK: omp.par.pre_finalize:
+// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+// CHECK: omp.par.outlined.exit.exitStub:
+// CHECK-NEXT: ret void
+
>From 7ff11c4802c0e013b3c8b833b368210ee0aa9abf Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 7 Nov 2023 04:05:04 -0600
Subject: [PATCH 2/3] Remove unnecessary checks
---
.../LLVMIR/omptarget-parallel-llvm.mlir | 70 +++++--------------
1 file changed, 18 insertions(+), 52 deletions(-)
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index 8d321dab33ccdf6..1f5f72e4b329e73 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -20,59 +20,25 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<3>, den
// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC0:@.*]](
// CHECK-SAME: ptr [[TMP0:%.*]]) {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP1:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
-// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
-// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8, addrspace(5)
-// CHECK-NEXT: [[STRUCTARG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRUCTARG]] to ptr
-// CHECK-NEXT: [[TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[TMP3]] to ptr
-// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) [[KERNEL_ENV:@.*]] to ptr))
-// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
-// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-// CHECK: user_code.entry:
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
-// CHECK-NEXT: br label [[OMP_TARGET:%.*]]
-// CHECK: omp.target:
-// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
-// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
-// CHECK: omp_parallel:
-// CHECK-NEXT: [[GEP_:%.*]] = getelementptr { ptr }, ptr addrspace(5) [[STRUCTARG]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[TMP6]], ptr addrspace(5) [[GEP_]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK-NEXT: store ptr [[STRUCTARG_ASCAST]], ptr [[TMP7]], align 8
-// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr [[FUNC1:@.*]], ptr null, ptr [[TMP2]], i64 1)
-// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK: omp.par.outlined.exit:
-// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK: omp.par.exit.split:
-// CHECK-NEXT: br label [[OMP_REGION_CONT:%.*]]
-// CHECK: omp.region.cont:
-// CHECK-NEXT: call void @__kmpc_target_deinit()
-// CHECK-NEXT: ret void
-// CHECK: worker.exit:
-// CHECK-NEXT: ret void
+// CHECK: [[TMP1:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+// CHECK: [[STRUCTARG:%.*]] = alloca { ptr }, align 8, addrspace(5)
+// CHECK: [[STRUCTARG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRUCTARG]] to ptr
+// CHECK: [[TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[TMP3]] to ptr
+// CHECK: store ptr [[TMP0]], ptr [[TMP4]], align 8
+// CHECK: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) [[KERNEL_ENV:@.*]] to ptr))
+// CHECK: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
+// CHECK: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// CHECK: [[GEP_:%.*]] = getelementptr { ptr }, ptr addrspace(5) [[STRUCTARG]], i32 0, i32 0
+// CHECK: store ptr [[TMP6]], ptr addrspace(5) [[GEP_]], align 8
+// CHECK: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK: store ptr [[STRUCTARG_ASCAST]], ptr [[TMP7]], align 8
+// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr [[FUNC1:@.*]], ptr null, ptr [[TMP2]], i64 1)
+// CHECK: call void @__kmpc_target_deinit()
// CHECK: define internal void [[FUNC1]](
// CHECK-SAME: ptr noalias noundef [[TID_ADDR_ASCAST:%.*]], ptr noalias noundef [[ZERO_ADDR_ASCAST:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: omp.par.entry:
-// CHECK-NEXT: [[GEP_:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: [[LOADGEP_:%.*]] = load ptr, ptr [[GEP_]], align 8
-// CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TID_ADDR_LOCAL]], align 4
-// CHECK-NEXT: [[TID:%.*]] = load i32, ptr addrspace(5) [[TID_ADDR_LOCAL]], align 4
-// CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
-// CHECK: omp.par.region:
-// CHECK-NEXT: br label [[OMP_PAR_REGION2:%.*]]
-// CHECK: omp.par.region2:
-// CHECK-NEXT: store i32 1, ptr [[LOADGEP_]], align 4
-// CHECK-NEXT: br label [[OMP_REGION_CONT1:%.*]]
-// CHECK: omp.region.cont1:
-// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
-// CHECK: omp.par.pre_finalize:
-// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-// CHECK: omp.par.outlined.exit.exitStub:
-// CHECK-NEXT: ret void
>From 1a7410d809147c1a0255c05216865d54f78ce08b Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 7 Nov 2023 05:15:20 -0600
Subject: [PATCH 3/3] Removed unnecessary MLIR attributes
---
mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index 1f5f72e4b329e73..0ccb28e86a0a9ae 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -3,7 +3,7 @@
// The aim of the test is to check the LLVM IR codegen for the device
// for omp target parallel construct
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<3>, dense<32> : vector<4xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<1>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<2>, dense<32> : vector<4xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<8>, dense<128> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<6>, dense<32> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<7>, dense<[160, 256, 256, 32]> : vector<4xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<4>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<5>, dense<32> : vector<4xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<"dlti.stack_alignment", 32 : i32>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.flags = #omp.flags<debug_kind = 1, openmp_device_version = 11>, omp.is_gpu = true, omp.is_target_device = true, omp.requires = #omp<clause_requires none>, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">, omp.version = #omp.version<version = 11>} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">} {
llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QQmain"} {
%0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
omp.target map_entries(%0 : !llvm.ptr) {
More information about the Mlir-commits
mailing list