[clang] [llvm] [WIP][OpenMP] Remove dependency on `libffi` from offloading runtime (PR #91264)
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Tue May 7 07:06:35 PDT 2024
jhuber6 wrote:
> > ```llvm
> > = load i32, ptr %.capture_expr., align 4
> > ```
>
> Why do you think it reads beyond __context? %2 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 0 points to the first element in the __context, if I'm not missing something. If it has the wrong value, looks like it is not written correctly
I think I copied the wrong code somehow,
```llvm
; Function Attrs: convergent noinline norecurse nounwind optnone uwtable
define weak_odr protected void @__omp_offloading_10302_adc9471_main_l10(ptr noalias noundef %dyn_ptr, ptr noalias noundef %__context) #0 {
entry:
%dyn_ptr.addr = alloca ptr, align 8
%__context.addr = alloca ptr, align 8
%Teams = alloca i32, align 4
%Threads = alloca i32, align 4
%.capture_expr. = alloca i32, align 4
%.capture_expr.1 = alloca i32, align 4
%Teams.casted = alloca i64, align 8
%Threads.casted = alloca i64, align 8
%0 = call i32 @__kmpc_global_thread_num(ptr @3)
store ptr %dyn_ptr, ptr %dyn_ptr.addr, align 8
store ptr %__context, ptr %__context.addr, align 8
%1 = load ptr, ptr %__context.addr, align 8
%2 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 0
%3 = load i32, ptr %2, align 4
store i32 %3, ptr %Teams, align 4
%4 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 1
%5 = load i32, ptr %4, align 4
store i32 %5, ptr %Threads, align 4
%6 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 2
%7 = load i32, ptr %6, align 4
store i32 %7, ptr %.capture_expr., align 4
%8 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 3
%9 = load i32, ptr %8, align 4
store i32 %9, ptr %.capture_expr.1, align 4
%10 = load i32, ptr %.capture_expr., align 4
%11 = load i32, ptr %.capture_expr.1, align 4
call void @__kmpc_push_num_teams(ptr @3, i32 %0, i32 %10, i32 %11)
%12 = load i32, ptr %Teams, align 4
store i32 %12, ptr %Teams.casted, align 4
%13 = load i64, ptr %Teams.casted, align 8
%14 = load i32, ptr %Threads, align 4
store i32 %14, ptr %Threads.casted, align 4
%15 = load i64, ptr %Threads.casted, align 8
call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @3, i32 2, ptr @__omp_offloading_10302_adc9471_main_l10.omp_outlined, i64 %13, i64 %15)
ret void
}
```
This is what I get from the corresponding C code.
```c
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
int main() {
int Threads = 6;
int Teams = 10;
long unsigned s = 0;
#pragma omp target teams distribute parallel for num_teams(Teams) \
thread_limit(Threads)
for (int i = 0; i < Threads * Teams; ++i) {
assert(Teams == 10);
}
return 0;
}
```
When I compile run it, I get the following. So it warns on some nonsense team value (It will be even more corrupt with other cases, but this was the simplest I could get).
```console
> clang malloc.c -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu
> ./a.out
OMP: Warning #96: Cannot form a team with 48 threads, using 21 instead.
OMP: Hint Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS), KMP_TEAMS_THREAD_LIMIT, and OMP_THREAD_LIMIT (if any are set).
```
The LLVM-IR is confusing to me because it's doing a GEP up to 3, which is suggesting that the Teams / Threads values are appended but the number of arguments isn't expected to be that big.
https://github.com/llvm/llvm-project/pull/91264
More information about the cfe-commits
mailing list