[flang-commits] [flang] [OpenMP][Flang] Add "IsolatedFromAbove" trait to omp.target (PR #67164)

Akash Banerjee via flang-commits flang-commits at lists.llvm.org
Fri Oct 13 07:17:16 PDT 2023


TIFitis wrote:

Here are code dumps at various stages with the current implementation:

Fortran:
```
subroutine omp_target_implicit_bounds(n)
   integer :: n
   integer :: a(n, 1024)
   !$omp target
      a(11,22) = 33
   !$omp end target
end subroutine omp_target_implicit_bounds
```

FIR:
```
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi32>>, #dlti.dl_entry<f80, dense<128> : vector<2xi32>>, #dlti.dl_entry<i128, dense<128> : vector<2xi32>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.version = #omp.version<version = 11>} {
  func.func @_QPomp_target_implicit_bounds(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}) {
    %0 = fir.load %arg0 : !fir.ref<i32>
    %1 = fir.convert %0 : (i32) -> i64
    %2 = fir.convert %1 : (i64) -> index
    %c0 = arith.constant 0 : index
    %3 = arith.cmpi sgt, %2, %c0 : index
    %4 = arith.select %3, %2, %c0 : index
    %c1024_i64 = arith.constant 1024 : i64
    %5 = fir.convert %c1024_i64 : (i64) -> index
    %c0_0 = arith.constant 0 : index
    %6 = arith.cmpi sgt, %5, %c0_0 : index
    %7 = arith.select %6, %5, %c0_0 : index
    %8 = fir.alloca !fir.array<?x1024xi32>, %4 {bindc_name = "a", uniq_name = "_QFomp_target_implicit_boundsEa"}
    %9 = omp.map_info var_ptr(%8 : !fir.ref<!fir.array<?x1024xi32>>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<!fir.array<?x1024xi32>> {name = "a"}
    %10 = omp.map_info var_ptr(%4 : index)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
    %11 = omp.map_info var_ptr(%7 : index)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
    omp.target   map_entries(%9 -> %arg1, %10 -> %arg2, %11 -> %arg3 : !fir.ref<!fir.array<?x1024xi32>>, index, index) {
    ^bb0(%arg1: !fir.ref<!fir.array<?x1024xi32>>, %arg2: index, %arg3: index):
      %c33_i32 = arith.constant 33 : i32
      %12 = fir.convert %arg1 : (!fir.ref<!fir.array<?x1024xi32>>) -> !fir.ref<!fir.array<?xi32>>
      %c1 = arith.constant 1 : index
      %c0_1 = arith.constant 0 : index
      %c11_i64 = arith.constant 11 : i64
      %13 = fir.convert %c11_i64 : (i64) -> index
      %14 = arith.subi %13, %c1 : index
      %15 = arith.muli %c1, %14 : index
      %16 = arith.addi %15, %c0_1 : index
      %17 = arith.muli %c1, %arg2 : index
      %c22_i64 = arith.constant 22 : i64
      %18 = fir.convert %c22_i64 : (i64) -> index
      %19 = arith.subi %18, %c1 : index
      %20 = arith.muli %17, %19 : index
      %21 = arith.addi %20, %16 : index
      %22 = fir.coordinate_of %12, %21 : (!fir.ref<!fir.array<?xi32>>, index) -> !fir.ref<i32>
      fir.store %c33_i32 to %22 : !fir.ref<i32>
      omp.terminator
    }
    return
  }
}
```

LLVMIR Dialect:
```
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi32>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi32>>, #dlti.dl_entry<f80, dense<128> : vector<2xi32>>, #dlti.dl_entry<i128, dense<128> : vector<2xi32>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "aarch64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.version = #omp.version<version = 11>} {
  llvm.func @_QPomp_target_implicit_bounds(%arg0: !llvm.ptr<i32> {fir.bindc_name = "n"}) {
    %0 = llvm.load %arg0 : !llvm.ptr<i32>
    %1 = llvm.sext %0 : i32 to i64
    %2 = llvm.mlir.constant(0 : index) : i64
    %3 = llvm.icmp "sgt" %1, %2 : i64
    %4 = llvm.select %3, %1, %2 : i1, i64
    %5 = llvm.mlir.constant(1024 : i64) : i64
    %6 = llvm.mlir.constant(0 : index) : i64
    %7 = llvm.icmp "sgt" %5, %6 : i64
    %8 = llvm.select %7, %5, %6 : i1, i64
    %9 = llvm.mlir.constant(1 : i64) : i64
    %10 = llvm.mlir.constant(1024 : i64) : i64
    %11 = llvm.mul %9, %10  : i64
    %12 = llvm.mul %11, %4  : i64
    %13 = llvm.alloca %12 x i32 {bindc_name = "a", in_type = !fir.array<?x1024xi32>, operandSegmentSizes = array<i32: 0, 1>, uniq_name = "_QFomp_target_implicit_boundsEa"} : (i64) -> !llvm.ptr<i32>
    %14 = omp.map_info var_ptr(%13 : !llvm.ptr<i32>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr<i32> {name = "a"}
    %15 = omp.map_info var_ptr(%4 : i64)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> i64 {name = ""}
    %16 = omp.map_info var_ptr(%8 : i64)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> i64 {name = ""}
    omp.target   map_entries(%14 -> %arg1, %15 -> %arg2, %16 -> %arg3 : !llvm.ptr<i32>, i64, i64) {
    ^bb0(%arg1: !llvm.ptr<i32>, %arg2: i64, %arg3: i64):
      %17 = llvm.mlir.constant(33 : i32) : i32
      %18 = llvm.mlir.constant(1 : index) : i64
      %19 = llvm.mlir.constant(0 : index) : i64
      %20 = llvm.mlir.constant(11 : i64) : i64
      %21 = llvm.sub %20, %18  : i64
      %22 = llvm.mlir.constant(22 : i64) : i64
      %23 = llvm.sub %22, %18  : i64
      %24 = llvm.mul %arg2, %23  : i64
      %25 = llvm.add %24, %21  : i64
      %26 = llvm.getelementptr %arg1[%25] : (!llvm.ptr<i32>, i64) -> !llvm.ptr<i32>
      llvm.store %17, %26 : !llvm.ptr<i32>
      omp.terminator
    }
    llvm.return
  }
}
```

llvm-IR(Host):
```
; ModuleID = 'FIRModule'
source_filename = "FIRModule"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 }
%struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }

@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @.omp_offloading.requires_reg, ptr null }]
@.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4.region_id = weak constant i8 0
@0 = private unnamed_addr constant [55 x i8] c";/home/akash/Documents/scratch/test.f90;unknown;3;15;;\00", align 1
@1 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @1 }, align 8
@.omp_offloading.entry_name = internal unnamed_addr constant [62 x i8] c"__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4\00"
@.omp_offloading.entry.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4.region_id, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1

declare ptr @malloc(i64)

declare void @free(ptr)

; Function Attrs: noinline nounwind
define internal void @.omp_offloading.requires_reg() #0 section ".text.startup" {
entry:
  call void @__tgt_register_requires(i64 1)
  ret void
}

; Function Attrs: nounwind
declare void @__tgt_register_requires(i64) #1

define void @omp_target_implicit_bounds_(ptr %0) {
  %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
  %2 = load i32, ptr %0, align 4
  %3 = sext i32 %2 to i64
  %4 = icmp sgt i64 %3, 0
  %5 = select i1 %4, i64 %3, i64 0
  %6 = mul i64 1024, %5
  %7 = alloca i32, i64 %6, align 4
  br label %entry

entry:                                            ; preds = %1
  %8 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0
  store i32 2, ptr %8, align 4
  %9 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1
  store i32 0, ptr %9, align 4
  %10 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2
  store ptr null, ptr %10, align 8
  %11 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3
  store ptr null, ptr %11, align 8
  %12 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4
  store ptr null, ptr %12, align 8
  %13 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5
  store ptr null, ptr %13, align 8
  %14 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6
  store ptr null, ptr %14, align 8
  %15 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7
  store ptr null, ptr %15, align 8
  %16 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8
  store i64 0, ptr %16, align 8
  %17 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9
  store i64 0, ptr %17, align 8
  %18 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10
  store [3 x i32] [i32 -1, i32 0, i32 0], ptr %18, align 4
  %19 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11
  store [3 x i32] zeroinitializer, ptr %19, align 4
  %20 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12
  store i32 0, ptr %20, align 4
  %21 = call i32 @__tgt_target_kernel(ptr @2, i64 -1, i32 -1, i32 0, ptr @.__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4.region_id, ptr %kernel_args)
  %22 = icmp ne i32 %21, 0
  br i1 %22, label %omp_offload.failed, label %omp_offload.cont

omp_offload.failed:                               ; preds = %entry
  call void @__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4(ptr %7, i64 %5, i64 1024)
  br label %omp_offload.cont

omp_offload.cont:                                 ; preds = %omp_offload.failed, %entry
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind willreturn
declare ptr @llvm.stacksave.p0() #2

; Function Attrs: nocallback nofree nosync nounwind willreturn
declare void @llvm.stackrestore.p0(ptr) #2

define internal void @__omp_offloading_1030b_49a6804_omp_target_implicit_bounds__l4(ptr %0, i64 %1, i64 %2) {
entry:
  br label %omp.target

omp.target:                                       ; preds = %entry
  %3 = mul i64 %1, 21
  %4 = add i64 %3, 10
  %5 = getelementptr i32, ptr %0, i64 %4
  store i32 33, ptr %5, align 4
  br label %omp.region.cont

omp.region.cont:                                  ; preds = %omp.target
  ret void
}

; Function Attrs: nounwind
declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr) #1

attributes #0 = { noinline nounwind }
attributes #1 = { nounwind }
attributes #2 = { nocallback nofree nosync nounwind willreturn }

!llvm.module.flags = !{!0, !1}
!omp_offload.info = !{!2}

!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 7, !"openmp", i32 11}
!2 = !{i32 0, i32 66315, i32 77228036, !"omp_target_implicit_bounds_", i32 4, i32 0, i32 0}
```





https://github.com/llvm/llvm-project/pull/67164


More information about the flang-commits mailing list