[flang-commits] [flang] [OpenMP][Flang] Add "IsolatedFromAbove" trait to omp.target (PR #67164)
Akash Banerjee via flang-commits
flang-commits at lists.llvm.org
Fri Oct 13 07:17:16 PDT 2023
TIFitis wrote:
Here are code dumps at various stages with the current implementation:
Fortran:
```
subroutine omp_target_implicit_bounds(n)
integer :: n
integer :: a(n, 1024)
!$omp target
a(11,22) = 33
!$omp end target
end subroutine omp_target_implicit_bounds
```
FIR:
```
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi32>>, #dlti.dl_entry<f80, dense<128> : vector<2xi32>>, #dlti.dl_entry<i128, dense<128> : vector<2xi32>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.version = #omp.version<version = 11>} {
func.func @_QPomp_target_implicit_bounds(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}) {
%0 = fir.load %arg0 : !fir.ref<i32>
%1 = fir.convert %0 : (i32) -> i64
%2 = fir.convert %1 : (i64) -> index
%c0 = arith.constant 0 : index
%3 = arith.cmpi sgt, %2, %c0 : index
%4 = arith.select %3, %2, %c0 : index
%c1024_i64 = arith.constant 1024 : i64
%5 = fir.convert %c1024_i64 : (i64) -> index
%c0_0 = arith.constant 0 : index
%6 = arith.cmpi sgt, %5, %c0_0 : index
%7 = arith.select %6, %5, %c0_0 : index
%8 = fir.alloca !fir.array<?x1024xi32>, %4 {bindc_name = "a", uniq_name = "_QFomp_target_implicit_boundsEa"}
%9 = omp.map_info var_ptr(%8 : !fir.ref<!fir.array<?x1024xi32>>) map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<!fir.array<?x1024xi32>> {name = "a"}
%10 = omp.map_info var_ptr(%4 : index) map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
%11 = omp.map_info var_ptr(%7 : index) map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
omp.target map_entries(%9 -> %arg1, %10 -> %arg2, %11 -> %arg3 : !fir.ref<!fir.array<?x1024xi32>>, index, index) {
^bb0(%arg1: !fir.ref<!fir.array<?x1024xi32>>, %arg2: index, %arg3: index):
%c33_i32 = arith.constant 33 : i32
%12 = fir.convert %arg1 : (!fir.ref<!fir.array<?x1024xi32>>) -> !fir.ref<!fir.array<?xi32>>
%c1 = arith.constant 1 : index
%c0_1 = arith.constant 0 : index
%c11_i64 = arith.constant 11 : i64
%13 = fir.convert %c11_i64 : (i64) -> index
%14 = arith.subi %13, %c1 : index
%15 = arith.muli %c1, %14 : index
%16 = arith.addi %15, %c0_1 : index
%17 = arith.muli %c1, %arg2 : index
%c22_i64 = arith.constant 22 : i64
%18 = fir.convert %c22_i64 : (i64) -> index
%19 = arith.subi %18, %c1 : index
%20 = arith.muli %17, %19 : index
%21 = arith.addi %20, %16 : index
%22 = fir.coordinate_of %12, %21 : (!fir.ref<!fir.array<?xi32>>, index) -> !fir.ref<i32>
fir.store %c33_i32 to %22 : !fir.ref<i32>
omp.terminator
}
return
}
}
```
LLVMIR Dialect:
```
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi32>>, #dlti.dl_entry<f80, dense<128> : vector<2xi32>>, #dlti.dl_entry<i128, dense<128> : vector<2xi32>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "aarch64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.version = #omp.version<version = 11>} {
llvm.func @_QPomp_target_implicit_bounds(%arg0: !llvm.ptr<i32> {fir.bindc_name = "n"}) {
%0 = llvm.load %arg0 : !llvm.ptr<i32>
%1 = llvm.sext %0 : i32 to i64
%2 = llvm.mlir.constant(0 : index) : i64
%3 = llvm.icmp "sgt" %1, %2 : i64
%4 = llvm.select %3, %1, %2 : i1, i64
%5 = llvm.mlir.constant(1024 : i64) : i64
%6 = llvm.mlir.constant(0 : index) : i64
%7 = llvm.icmp "sgt" %5, %6 : i64
%8 = llvm.select %7, %5, %6 : i1, i64
%9 = llvm.mlir.constant(1 : i64) : i64
%10 = llvm.mlir.constant(1024 : i64) : i64
%11 = llvm.mul %9, %10 : i64
%12 = llvm.mul %11, %4 : i64
%13 = llvm.alloca %12 x i32 {bindc_name = "a", in_type = !fir.array<?x1024xi32>, operandSegmentSizes = array<i32: 0, 1>, uniq_name = "_QFomp_target_implicit_boundsEa"} : (i64) -> !llvm.ptr<i32>
%14 = omp.map_info var_ptr(%13 : !llvm.ptr<i32>) map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr<i32> {name = "a"}
%15 = omp.map_info var_ptr(%4 : i64) map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> i64 {name = ""}
%16 = omp.map_info var_ptr(%8 : i64) map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> i64 {name = ""}
omp.target map_entries(%14 -> %arg1, %15 -> %arg2, %16 -> %arg3 : !llvm.ptr<i32>, i64, i64) {
^bb0(%arg1: !llvm.ptr<i32>, %arg2: i64, %arg3: i64):
%17 = llvm.mlir.constant(33 : i32) : i32
%18 = llvm.mlir.constant(1 : index) : i64
%19 = llvm.mlir.constant(0 : index) : i64
%20 = llvm.mlir.constant(11 : i64) : i64
%21 = llvm.sub %20, %18 : i64
%22 = llvm.mlir.constant(22 : i64) : i64
%23 = llvm.sub %22, %18 : i64
%24 = llvm.mul %arg2, %23 : i64
%25 = llvm.add %24, %21 : i64
%26 = llvm.getelementptr %arg1[%25] : (!llvm.ptr<i32>, i64) -> !llvm.ptr<i32>
llvm.store %17, %26 : !llvm.ptr<i32>
omp.terminator
}
llvm.return
}
}
```
llvm-IR(Host):
```
; ModuleID = 'FIRModule'
source_filename = "FIRModule"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 }
%struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @.omp_offloading.requires_reg, ptr null }]
@.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4.region_id = weak constant i8 0
@0 = private unnamed_addr constant [55 x i8] c";/home/akash/Documents/scratch/test.f90;unknown;3;15;;\00", align 1
@1 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @1 }, align 8
@.omp_offloading.entry_name = internal unnamed_addr constant [62 x i8] c"__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4\00"
@.omp_offloading.entry.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4.region_id, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1
declare ptr @malloc(i64)
declare void @free(ptr)
; Function Attrs: noinline nounwind
define internal void @.omp_offloading.requires_reg() #0 section ".text.startup" {
entry:
call void @__tgt_register_requires(i64 1)
ret void
}
; Function Attrs: nounwind
declare void @__tgt_register_requires(i64) #1
define void @omp_target_implicit_bounds_(ptr %0) {
%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
%2 = load i32, ptr %0, align 4
%3 = sext i32 %2 to i64
%4 = icmp sgt i64 %3, 0
%5 = select i1 %4, i64 %3, i64 0
%6 = mul i64 1024, %5
%7 = alloca i32, i64 %6, align 4
br label %entry
entry: ; preds = %1
%8 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0
store i32 2, ptr %8, align 4
%9 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1
store i32 0, ptr %9, align 4
%10 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2
store ptr null, ptr %10, align 8
%11 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3
store ptr null, ptr %11, align 8
%12 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4
store ptr null, ptr %12, align 8
%13 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5
store ptr null, ptr %13, align 8
%14 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6
store ptr null, ptr %14, align 8
%15 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7
store ptr null, ptr %15, align 8
%16 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8
store i64 0, ptr %16, align 8
%17 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9
store i64 0, ptr %17, align 8
%18 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10
store [3 x i32] [i32 -1, i32 0, i32 0], ptr %18, align 4
%19 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11
store [3 x i32] zeroinitializer, ptr %19, align 4
%20 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12
store i32 0, ptr %20, align 4
%21 = call i32 @__tgt_target_kernel(ptr @2, i64 -1, i32 -1, i32 0, ptr @.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4.region_id, ptr %kernel_args)
%22 = icmp ne i32 %21, 0
br i1 %22, label %omp_offload.failed, label %omp_offload.cont
omp_offload.failed: ; preds = %entry
call void @__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4(ptr %7, i64 %5, i64 1024)
br label %omp_offload.cont
omp_offload.cont: ; preds = %omp_offload.failed, %entry
ret void
}
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare ptr @llvm.stacksave.p0() #2
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare void @llvm.stackrestore.p0(ptr) #2
define internal void @__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4(ptr %0, i64 %1, i64 %2) {
entry:
br label %omp.target
omp.target: ; preds = %entry
%3 = mul i64 %1, 21
%4 = add i64 %3, 10
%5 = getelementptr i32, ptr %0, i64 %4
store i32 33, ptr %5, align 4
br label %omp.region.cont
omp.region.cont: ; preds = %omp.target
ret void
}
; Function Attrs: nounwind
declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr) #1
attributes #0 = { noinline nounwind }
attributes #1 = { nounwind }
attributes #2 = { nocallback nofree nosync nounwind willreturn }
!llvm.module.flags = !{!0, !1}
!omp_offload.info = !{!2}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 7, !"openmp", i32 11}
!2 = !{i32 0, i32 66315, i32 77228036, !"omp_target_implicit_bounds_", i32 4, i32 0, i32 0}
```
https://github.com/llvm/llvm-project/pull/67164
More information about the flang-commits
mailing list