[llvm] 2171103 - [OpenMP] Allow the Attributor to look at functions we also internalized
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 11 20:17:12 PDT 2022
Author: Johannes Doerfert
Date: 2022-09-11T20:16:11-07:00
New Revision: 21711039e3ab3fd9ed296cc2c07a011c09dd170b
URL: https://github.com/llvm/llvm-project/commit/21711039e3ab3fd9ed296cc2c07a011c09dd170b
DIFF: https://github.com/llvm/llvm-project/commit/21711039e3ab3fd9ed296cc2c07a011c09dd170b.diff
LOG: [OpenMP] Allow the Attributor to look at functions we also internalized
This is important as we have accesses to globals in those which we need to
categorize.
Added:
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/remove_globalization.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 794e85f877bea..81459eda4cadb 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1156,7 +1156,7 @@ struct InformationCache {
/// Check whether \p F is part of module slice.
bool isInModuleSlice(const Function &F) {
- return ModuleSlice.count(const_cast<Function *>(&F));
+ return ModuleSlice.empty() || ModuleSlice.count(const_cast<Function *>(&F));
}
/// Return true if the stack (llvm::Alloca) can be accessed by other threads.
@@ -1438,8 +1438,8 @@ struct Attributor {
// We update only AAs associated with functions in the Functions set or
// call sites of them.
- if ((AnchorFn && !Functions.count(const_cast<Function *>(AnchorFn))) &&
- !Functions.count(IRP.getAssociatedFunction())) {
+ if ((AnchorFn && !isRunOn(const_cast<Function *>(AnchorFn))) &&
+ !isRunOn(IRP.getAssociatedFunction())) {
AA.getState().indicatePessimisticFixpoint();
return AA;
}
@@ -1554,8 +1554,9 @@ struct Attributor {
bool isModulePass() const { return Configuration.IsModulePass; }
/// Return true if we derive attributes for \p Fn
- bool isRunOn(Function &Fn) const {
- return Functions.empty() || Functions.count(&Fn);
+ bool isRunOn(Function &Fn) const { return isRunOn(&Fn); }
+ bool isRunOn(Function *Fn) const {
+ return Functions.empty() || Functions.count(Fn);
}
/// Determine opportunities to derive 'default' attributes in \p F and create
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index d94a651a770af..bc19afb041989 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -3501,11 +3501,21 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
bool UsedAssumedInformation = false;
SmallSetVector<Value *, 4> PotentialCopies;
if (!AA::getPotentialCopiesOfStoredValue(A, SI, PotentialCopies, *this,
- UsedAssumedInformation))
+ UsedAssumedInformation)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[AAIsDead] Could not determine potential copies of store!\n");
return false;
+ }
+ LLVM_DEBUG(dbgs() << "[AAIsDead] Store has " << PotentialCopies.size()
+ << " potential copies.\n");
return llvm::all_of(PotentialCopies, [&](Value *V) {
- return A.isAssumedDead(IRPosition::value(*V), this, nullptr,
- UsedAssumedInformation);
+ if (A.isAssumedDead(IRPosition::value(*V), this, nullptr,
+ UsedAssumedInformation))
+ return true;
+ LLVM_DEBUG(dbgs() << "[AAIsDead] Potential copy " << *V
+ << " is assumed live!\n");
+ return false;
});
}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 66484c166ec77..17714fb89992b 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -182,13 +182,13 @@ struct AAICVTracker;
/// Attributor runs.
struct OMPInformationCache : public InformationCache {
OMPInformationCache(Module &M, AnalysisGetter &AG,
- BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
+ BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
KernelSet &Kernels)
- : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
+ : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
Kernels(Kernels) {
OMPBuilder.initialize();
- initializeRuntimeFunctions();
+ initializeRuntimeFunctions(M);
initializeInternalControlVars();
}
@@ -412,7 +412,7 @@ struct OMPInformationCache : public InformationCache {
// TODO: We directly convert uses into proper calls and unknown uses.
for (Use &U : RFI.Declaration->uses()) {
if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
- if (ModuleSlice.count(UserI->getFunction())) {
+ if (ModuleSlice.empty() || ModuleSlice.count(UserI->getFunction())) {
RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
++NumUses;
}
@@ -445,8 +445,7 @@ struct OMPInformationCache : public InformationCache {
/// Helper to initialize all runtime function information for those defined
/// in OpenMPKinds.def.
- void initializeRuntimeFunctions() {
- Module &M = *((*ModuleSlice.begin())->getParent());
+ void initializeRuntimeFunctions(Module &M) {
// Helper macros for handling __VA_ARGS__ in OMP_RTL
#define OMP_TYPE(VarName, ...) \
@@ -855,7 +854,7 @@ struct OpenMPOpt {
InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
ICV_proc_bind};
- for (Function *F : OMPInfoCache.ModuleSlice) {
+ for (Function *F : SCC) {
for (auto ICV : ICVs) {
auto ICVInfo = OMPInfoCache.ICVs[ICV];
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
@@ -2148,7 +2147,7 @@ struct OpenMPOpt {
};
Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
- if (!OMPInfoCache.ModuleSlice.count(&F))
+ if (!OMPInfoCache.ModuleSlice.empty() && !OMPInfoCache.ModuleSlice.count(&F))
return nullptr;
// Use a scope to keep the lifetime of the CachedKernel short.
@@ -5050,8 +5049,7 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
BumpPtrAllocator Allocator;
CallGraphUpdater CGUpdater;
- SetVector<Function *> Functions(SCC.begin(), SCC.end());
- OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
+ OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
@@ -5063,6 +5061,7 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
AC.OREGetter = OREGetter;
AC.PassName = DEBUG_TYPE;
+ SetVector<Function *> Functions;
Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
@@ -5125,7 +5124,7 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
- /*CGSCC*/ Functions, Kernels);
+ /*CGSCC*/ &Functions, Kernels);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
@@ -5204,7 +5203,7 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
BumpPtrAllocator Allocator;
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
Allocator,
- /*CGSCC*/ Functions, Kernels);
+ /*CGSCC*/ &Functions, Kernels);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 6bb3681dc5c47..5d1cf6e20586b 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
-; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -openmp-opt-max-iterations=1 -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-FIXPOINT
; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
@@ -11,7 +10,6 @@ target triple = "nvptx64"
; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack.
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
-; CHECK-FIXPOINT: Attributor did not reach a fixpoint after 1 iterations.
; UTC_ARGS: --enable
@S = external local_unnamed_addr global i8*
More information about the llvm-commits
mailing list