[Openmp-commits] [llvm] [openmp] [OpenMP][offload] Inline target reductions (PR #196061)
Robert Imschweiler via Openmp-commits
openmp-commits at lists.llvm.org
Thu May 7 08:48:38 PDT 2026
ro-i wrote:
Update on my earlier message:
`.../llvm-xteam-red-inline/bin/clang++ -fopenmp --offload-arch=gfx90a -O2 -fsave-optimization-record -Rpass=inline -Rpass-missed=inline offload/test/offloading/multiple_reductions.cpp 2>&1 | grep '__kmpc_nvptx_teams_reduce.*inlined into'`
Gives us a lot of these lines:
```
offload/test/offloading/multiple_reductions.cpp:120:67: '__kmpc_nvptx_teams_reduce_nowait_v2' not inlined into '__omp_offloading_821_5185a1d_main_l120' because it should never be inlined (cost=never): conflicting attributes
offload/test/offloading/multiple_reductions.cpp:33:67: '__kmpc_nvptx_teams_reduce_nowait_v2' not inlined into '__omp_offloading_821_5185a1d__Z8run_typeIdEvv_l33' because it should never be inlined (cost=never): conflicting attributes
offload/test/offloading/multiple_reductions.cpp:42:67: '__kmpc_nvptx_teams_reduce_nowait_v2' not inlined into '__omp_offloading_821_5185a1d__Z8run_typeIdEvv_l42' because it should never be inlined (cost=never): conflicting attributes
[...]
```
And if I use -save-temps and look into `multiple_reductions-openmp-amdgcn-amd-amdhsa-gfx90a.tmp.ll`, then I see, e.g.:
```llvm
define internal void @__omp_offloading_821_5185a1d_main_l120_omp_outlined(ptr noalias noundef %.global_tid., ptr noalias noundef %.bound_tid., ptr noundef nonnull align 8 dereferenceable(8) %pi) #1 {
...
%26 = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr %.omp.reduction.red_list.ascast, ptr @_omp_reduction_shuffle_and_reduce_func.1, ptr @_omp_reduction_inter_warp_copy_func.2, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
...
}
...
define internal void @__omp_offloading_821_5185a1d__Z8run_typeIdEvv_l33_omp_outlined(ptr noalias noundef %.global_tid., ptr noalias noundef %.bound_tid., ptr noundef nonnull align 8 dereferenceable(8) %s1, ptr noundef %in1) #1 {
...
%28 = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr %.omp.reduction.red_list.ascast, ptr @_omp_reduction_shuffle_and_reduce_func.5, ptr @_omp_reduction_inter_warp_copy_func.6, ptr @_omp_reduction_list_to_global_copy_func.7, ptr @_omp_reduction_list_to_global_reduce_func.8, ptr @_omp_reduction_global_to_list_copy_func.9, ptr @_omp_reduction_global_to_list_reduce_func.10)
...
}
attributes #1 = { alwaysinline convergent norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" }
```
https://github.com/llvm/llvm-project/pull/196061
More information about the Openmp-commits
mailing list