[llvm] 74cacf2 - [OpenMP] Add RTL function to externalization RAII
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 17 11:31:06 PST 2022
Author: Joseph Huber
Date: 2022-02-17T14:30:58-05:00
New Revision: 74cacf212bb31f8ba837b7eb2434258dd79eaccb
URL: https://github.com/llvm/llvm-project/commit/74cacf212bb31f8ba837b7eb2434258dd79eaccb
DIFF: https://github.com/llvm/llvm-project/commit/74cacf212bb31f8ba837b7eb2434258dd79eaccb.diff
LOG: [OpenMP] Add RTL function to externalization RAII
This patch adds the '_kmpc_get_hardware_num_threads_in_block'
OpenMP RTL function to the externalization RAII struct. This was getting
optimized out and then being replaced with an undefined value once added
back in, causing bugs for complex reductions.
Fixes #53909.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D120076
Added:
Modified:
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index f577a6b0f1743..392b919c5a120 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2119,6 +2119,8 @@ struct OpenMPOpt {
OMPRTL___kmpc_barrier_simple_generic);
ExternalizationRAII ThreadId(OMPInfoCache,
OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ ExternalizationRAII NumThreads(
+ OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block);
ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
registerAAs(IsModulePass);
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
index b72031a9b68c0..57eaebc7e141c 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -178,7 +178,16 @@ entry:
ret void
}
-declare i32 @__kmpc_get_hardware_num_threads_in_block()
+define internal i32 @__kmpc_get_hardware_num_threads_in_block() {
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_get_hardware_num_threads_in_block
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT: [[RET:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ret = call i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
+ ret i32 %ret
+}
+declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
More information about the llvm-commits
mailing list