[llvm] bfa1afb - [OpenMPOpt] Improve __kmpc_alloc_shared handling
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 17 19:25:43 PDT 2023
Author: Johannes Doerfert
Date: 2023-08-17T19:25:32-07:00
New Revision: bfa1afb81c7d521e87c249fb4ecfd55ebd0de01c
URL: https://github.com/llvm/llvm-project/commit/bfa1afb81c7d521e87c249fb4ecfd55ebd0de01c
DIFF: https://github.com/llvm/llvm-project/commit/bfa1afb81c7d521e87c249fb4ecfd55ebd0de01c.diff
LOG: [OpenMPOpt] Improve __kmpc_alloc_shared handling
We know that __kmpc_alloc_shared is by construction matched with a
unique __kmpc_free_shared. Making the compiler aware of these facts
helps to avoid mallocs/allocas.
Fixes: https://github.com/llvm/llvm-project/issues/64551
Added:
Modified:
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/test/Transforms/OpenMP/remove_globalization.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index e4b79acf3f02e3..4633f9b796941e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -7017,13 +7017,17 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
<< **DI->PotentialAllocationCalls.begin() << "\n");
return false;
}
- Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
- if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
- LLVM_DEBUG(
- dbgs()
- << "[H2S] unique free call might not be executed with the allocation "
- << *UniqueFree << "\n");
- return false;
+
+ // __kmpc_alloc_shared and __kmpc_alloc_free are by construction matched.
+ if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) {
+ Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
+ if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[H2S] unique free call might not be executed with the allocation "
+ << *UniqueFree << "\n");
+ return false;
+ }
}
return true;
};
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index b864700e18abef..de9c542975732f 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -14,8 +14,9 @@ target triple = "nvptx64"
; UTC_ARGS: --disable
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
+; CHECK-REMARKS: remark: remove_globalization.c:4:2: Moving globalized variable to the stack.
+; CHECK-REMARKS: remark: remove_globalization.c:10:2: Moving globalized variable to the stack.
; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack.
-; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
; UTC_ARGS: --enable
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
@@ -96,17 +97,15 @@ define internal void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
-; CHECK-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]]
-; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR5]]
+; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
+; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
-; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]]
-; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR5]]
+; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
+; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
@@ -147,8 +146,8 @@ define void @unused() {
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR5]], !dbg [[DBG11:![0-9]+]]
-; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR5]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG11:![0-9]+]]
+; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
@@ -262,16 +261,15 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(write) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allocsize(0) }
; CHECK: attributes #[[ATTR4]] = { "llvm.assume"="omp_no_openmp" }
-; CHECK: attributes #[[ATTR5]] = { nounwind }
-; CHECK: attributes #[[ATTR6]] = { nosync nounwind memory(write) }
+; CHECK: attributes #[[ATTR5]] = { nosync nounwind memory(write) }
;.
; CHECK-DISABLED: attributes #[[ATTR0]] = { "kernel" }
; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind }
; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(write) }
; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allocsize(0) }
; CHECK-DISABLED: attributes #[[ATTR4]] = { "llvm.assume"="omp_no_openmp" }
-; CHECK-DISABLED: attributes #[[ATTR5]] = { nounwind }
-; CHECK-DISABLED: attributes #[[ATTR6]] = { nosync nounwind memory(write) }
+; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind memory(write) }
+; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c")
More information about the llvm-commits
mailing list