[Openmp-commits] [openmp] 7f7e174 - [OpenMP] Be smarter about the insertion point for deduplication
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Mon Mar 27 21:30:51 PDT 2023
Author: Johannes Doerfert
Date: 2023-03-27T21:30:23-07:00
New Revision: 7f7e1749c5bdab8116075c57711f7f11521bb527
URL: https://github.com/llvm/llvm-project/commit/7f7e1749c5bdab8116075c57711f7f11521bb527
DIFF: https://github.com/llvm/llvm-project/commit/7f7e1749c5bdab8116075c57711f7f11521bb527.diff
LOG: [OpenMP] Be smarter about the insertion point for deduplication
We can use dominance and avoid the special handling of kernels and
prevent inserting code before allocas accidentally (as happend in the
runtime test).
Added:
Modified:
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/deduplication.ll
llvm/test/Transforms/OpenMP/deduplication_target.ll
openmp/libomptarget/test/offloading/atomic-compare-signedness.c
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 1a3c692e628eb..e2d3aef240af8 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -1706,37 +1707,27 @@ struct OpenMPOpt {
};
if (!ReplVal) {
- for (Use *U : *UV)
+ auto *DT =
+ OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F);
+ if (!DT)
+ return false;
+ Instruction *IP = nullptr;
+ for (Use *U : *UV) {
if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
+ if (IP)
+ IP = DT->findNearestCommonDominator(IP, CI);
+ else
+ IP = CI;
if (!CanBeMoved(*CI))
continue;
-
- // If the function is a kernel, dedup will move
- // the runtime call right after the kernel init callsite. Otherwise,
- // it will move it to the beginning of the caller function.
- if (isKernel(F)) {
- auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
- auto *KernelInitUV = KernelInitRFI.getUseVector(F);
-
- if (KernelInitUV->empty())
- continue;
-
- assert(KernelInitUV->size() == 1 &&
- "Expected a single __kmpc_target_init in kernel\n");
-
- CallInst *KernelInitCI =
- getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
- assert(KernelInitCI &&
- "Expected a call to __kmpc_target_init in kernel\n");
-
- CI->moveAfter(KernelInitCI);
- } else
- CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
- ReplVal = CI;
- break;
+ if (!ReplVal)
+ ReplVal = CI;
}
+ }
if (!ReplVal)
return false;
+ assert(IP && "Expected insertion point!");
+ cast<Instruction>(ReplVal)->moveBefore(IP);
}
// If we use a call as a replacement value we need to make sure the ident is
diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll
index df5c7737ee39c..b7964a1f26431 100644
--- a/llvm/test/Transforms/OpenMP/deduplication.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication.ll
@@ -102,8 +102,8 @@ m:
define void @local_and_global_gtid_calls() {
; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
+; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
@@ -132,10 +132,10 @@ entry:
define void @local_gtid_calls_only() {
; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
+; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll
index f1e9d656e80cc..f76e0f66d12ca 100644
--- a/llvm/test/Transforms/OpenMP/deduplication_target.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll
@@ -19,10 +19,10 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
diff --git a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c
index 08b7acc20338a..5b8ebeae83ae7 100644
--- a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c
+++ b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c
@@ -5,6 +5,8 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic | %fcheck-generic
+// RUN: %libomptarget-compileopt-generic -fopenmp-version=51
+// RUN: %libomptarget-run-generic | %fcheck-generic
// High parallelism increases our chances of detecting a lack of atomicity.
#define NUM_THREADS_TRY 256
More information about the Openmp-commits
mailing list