[clang] [llvm] [WIP][OpenMP] Remove dependency on `libffi` from offloading runtime (PR #91264)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Tue May 7 07:06:35 PDT 2024


jhuber6 wrote:

> > ```llvm
> > = load i32, ptr %.capture_expr., align 4
> > ```
> 
> Why do you think it reads beyond __context? %2 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 0 points to the first element in the __context, if I'm not missing something. If it has the wrong value, looks like it is not written correctly

I think I copied the wrong code somehow,
```llvm
; Function Attrs: convergent noinline norecurse nounwind optnone uwtable
define weak_odr protected void @__omp_offloading_10302_adc9471_main_l10(ptr noalias noundef %dyn_ptr, ptr noalias noundef %__context) #0 {
entry:
  %dyn_ptr.addr = alloca ptr, align 8
  %__context.addr = alloca ptr, align 8
  %Teams = alloca i32, align 4
  %Threads = alloca i32, align 4
  %.capture_expr. = alloca i32, align 4
  %.capture_expr.1 = alloca i32, align 4
  %Teams.casted = alloca i64, align 8
  %Threads.casted = alloca i64, align 8
  %0 = call i32 @__kmpc_global_thread_num(ptr @3)
  store ptr %dyn_ptr, ptr %dyn_ptr.addr, align 8
  store ptr %__context, ptr %__context.addr, align 8
  %1 = load ptr, ptr %__context.addr, align 8
  %2 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 0
  %3 = load i32, ptr %2, align 4
  store i32 %3, ptr %Teams, align 4
  %4 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 1
  %5 = load i32, ptr %4, align 4
  store i32 %5, ptr %Threads, align 4
  %6 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 2
  %7 = load i32, ptr %6, align 4
  store i32 %7, ptr %.capture_expr., align 4
  %8 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 3
  %9 = load i32, ptr %8, align 4
  store i32 %9, ptr %.capture_expr.1, align 4
  %10 = load i32, ptr %.capture_expr., align 4
  %11 = load i32, ptr %.capture_expr.1, align 4
  call void @__kmpc_push_num_teams(ptr @3, i32 %0, i32 %10, i32 %11)
  %12 = load i32, ptr %Teams, align 4
  store i32 %12, ptr %Teams.casted, align 4
  %13 = load i64, ptr %Teams.casted, align 8
  %14 = load i32, ptr %Threads, align 4
  store i32 %14, ptr %Threads.casted, align 4
  %15 = load i64, ptr %Threads.casted, align 8
  call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @3, i32 2, ptr @__omp_offloading_10302_adc9471_main_l10.omp_outlined, i64 %13, i64 %15)
  ret void
}
```
This is what I get from the corresponding C code.
```c
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>

int main() {
  int Threads = 6;
  int Teams = 10;

  long unsigned s = 0;
#pragma omp target teams distribute parallel for num_teams(Teams)              \
    thread_limit(Threads)
  for (int i = 0; i < Threads * Teams; ++i) {
    assert(Teams == 10);
  }

  return 0;
}
```
When I compile run it, I get the following. So it warns on some nonsense team value (It will be even more corrupt with other cases, but this was the simplest I could get).
```console
> clang malloc.c -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu                                         
> ./a.out 
OMP: Warning #96: Cannot form a team with 48 threads, using 21 instead.
OMP: Hint Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS), KMP_TEAMS_THREAD_LIMIT, and OMP_THREAD_LIMIT (if any are set).
```
The LLVM-IR is confusing to me because it's doing a GEP up to 3, which is suggesting that the Teams / Threads values are appended but the number of arguments isn't expected to be that big.

https://github.com/llvm/llvm-project/pull/91264


More information about the llvm-commits mailing list