[clang] [llvm] [mlir] [OMPIRBuilder] - Handle dependencies in `createTarget` (PR #93977)
Sergio Afonso via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 26 04:09:18 PDT 2024
================
@@ -0,0 +1,83 @@
+! Offloading test checking the use of the depend clause on
+! the target construct
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ implicit none
+ integer :: a = 0
+ INTERFACE
+ FUNCTION omp_get_device_num() BIND(C)
+ USE, INTRINSIC :: iso_c_binding, ONLY: C_INT
+ integer :: omp_get_device_num
+ END FUNCTION omp_get_device_num
+ END INTERFACE
+
+ call foo(5, a)
+ print*, "======= FORTRAN Test passed! ======="
+ print*, "foo(5) returned ", a, ", expected 6\n"
+
+ ! stop 0
+ contains
+ subroutine foo(N, r)
+ integer, intent(in) :: N
+ integer, intent(out) :: r
+ integer :: z, i, j, k, accumulator
+ z = 1
+ accumulator = 0
+ ! Spawn 3 threads
+ !$omp parallel num_threads(3)
+
+ ! A single thread will then create two tasks
+ ! One is the 'producer' and potentially slower
+ ! task that updates 'z' to 'N'. The second is an
+ ! offloaded target task that increments 'z'.
+ ! If the depend clauses work properly, the
+ ! target task should wait for the 'producer'
+ ! task to complete before incrementing z
+ ! We use !$omp single here because only
+ ! the depend clause establishes dependencies
+ ! between sibling tasks only. This is the easiest
+ ! way of creating two sibling tasks.
+ !$omp single
+ !$omp task depend(out: z) shared(z)
+ do while (k < 32000)
+ do while (j < 32766)
+ do while (i < 32766)
+ ! dumb loop nest to slow down the update of
+ ! z
+ i = i + 1
+ ! Adding a function call slows down the producer
+ ! to the point that removing the depend clause
+ ! from the target construct below frequently
+ ! results in the wrong answer.
+ accumulator = accumulator + omp_get_device_num()
+ end do
+ j = j +1
+ end do
+ k = k + 1
+ end do
+ z = N
+ !$omp end task
+
+ ! z is 5 now. Increment z to 6.
+ !$omp target map(tofrom: z) depend(in:z)
+ z = z + 1
+ !$omp end target
+ !$omp end single
+ !$omp end parallel
+ ! Use 'accumulator' so it is not optimized away
+ ! by the compiler.
+ print *, accumulator
+ r = z
+end subroutine foo
----------------
skatrak wrote:
```suggestion
end subroutine foo
```
https://github.com/llvm/llvm-project/pull/93977
More information about the cfe-commits
mailing list