[llvm] 1eb31d6 - [Attributor] Reorganize AAHeapToStack
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 10 10:33:30 PDT 2021
Author: Johannes Doerfert
Date: 2021-07-10T12:32:50-05:00
New Revision: 1eb31d6de36bc274fa8fb692615515223148ac5e
URL: https://github.com/llvm/llvm-project/commit/1eb31d6de36bc274fa8fb692615515223148ac5e
DIFF: https://github.com/llvm/llvm-project/commit/1eb31d6de36bc274fa8fb692615515223148ac5e.diff
LOG: [Attributor] Reorganize AAHeapToStack
In order to simplify future extensions, e.g., the merge of
AAHeapToShared in to AAHeapToStack, we reorganize AAHeapToStack and the
state we keep for each malloc-like call. The result is also less
confusing as we only track malloc-like calls, not all calls. Further, we
only perform the updates necessary for a malloc-like to argue it can go
to the stack, e.g., we won't check all uses if we moved on to the
"must-be-freed" argument.
This patch also uses Attributor helps to simplify the allocated size,
alignment, and the potentially freed objects.
Overall, this is mostly a reorganization and only the use of the
optimistic helpers should change (=improve) the capabilities a bit.
Differential Revision: https://reviews.llvm.org/D104993
Added:
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Transforms/IPO/Attributor.cpp
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/Attributor/depgraph.ll
llvm/test/Transforms/Attributor/heap_to_stack.ll
llvm/test/Transforms/OpenMP/remove_globalization.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 03e1abd665d63..d77d5cabfda59 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1729,17 +1729,21 @@ struct Attributor {
bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
- bool CheckBBLivenessOnly = false);
+ bool CheckBBLivenessOnly = false,
+ bool CheckPotentiallyDead = false);
/// Check \p Pred on all call-like instructions (=CallBased derived).
///
/// See checkForAllCallLikeInstructions(...) for more information.
bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred,
- const AbstractAttribute &QueryingAA) {
+ const AbstractAttribute &QueryingAA,
+ bool CheckBBLivenessOnly = false,
+ bool CheckPotentiallyDead = false) {
return checkForAllInstructions(Pred, QueryingAA,
{(unsigned)Instruction::Invoke,
(unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call});
+ (unsigned)Instruction::Call},
+ CheckBBLivenessOnly, CheckPotentiallyDead);
}
/// Check \p Pred on all Read/Write instructions.
@@ -3493,9 +3497,6 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
/// Returns true if HeapToStack conversion is assumed to be possible.
virtual bool isAssumedHeapToStack(CallBase &CB) const = 0;
- /// Returns true if HeapToStack conversion is known to be possible.
- virtual bool isKnownHeapToStack(CallBase &CB) const = 0;
-
/// Create an abstract attribute view for the position \p IRP.
static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 6f98ec0c59287..153c1be51fa69 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1023,7 +1023,7 @@ static bool checkForAllInstructionsImpl(
Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap,
function_ref<bool(Instruction &)> Pred, const AbstractAttribute *QueryingAA,
const AAIsDead *LivenessAA, const ArrayRef<unsigned> &Opcodes,
- bool CheckBBLivenessOnly = false) {
+ bool CheckBBLivenessOnly = false, bool CheckPotentiallyDead = false) {
for (unsigned Opcode : Opcodes) {
// Check if we have instructions with this opcode at all first.
auto *Insts = OpcodeInstMap.lookup(Opcode);
@@ -1032,8 +1032,9 @@ static bool checkForAllInstructionsImpl(
for (Instruction *I : *Insts) {
// Skip dead instructions.
- if (A && A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
- CheckBBLivenessOnly))
+ if (A && !CheckPotentiallyDead &&
+ A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
+ CheckBBLivenessOnly))
continue;
if (!Pred(*I))
@@ -1046,7 +1047,8 @@ static bool checkForAllInstructionsImpl(
bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
- bool CheckBBLivenessOnly) {
+ bool CheckBBLivenessOnly,
+ bool CheckPotentiallyDead) {
const IRPosition &IRP = QueryingAA.getIRPosition();
// Since we need to provide instructions we have to have an exact definition.
@@ -1057,14 +1059,15 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
const auto *LivenessAA =
- CheckBBLivenessOnly
+ (CheckBBLivenessOnly || CheckPotentiallyDead)
? nullptr
: &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
auto &OpcodeInstMap =
InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
- LivenessAA, Opcodes, CheckBBLivenessOnly))
+ LivenessAA, Opcodes, CheckBBLivenessOnly,
+ CheckPotentiallyDead))
return false;
return true;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index deb32c72920bd..e367371682034 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/Constants.h"
#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -27,9 +29,12 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
@@ -4837,23 +4842,117 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
};
/// ----------------------- Heap-To-Stack Conversion ---------------------------
-struct AAHeapToStackImpl : public AAHeapToStack {
- AAHeapToStackImpl(const IRPosition &IRP, Attributor &A)
+struct AAHeapToStackFunction final : public AAHeapToStack {
+
+ struct AllocationInfo {
+ /// The call that allocates the memory.
+ CallBase *CB;
+
+ /// The kind of allocation.
+ const enum class AllocationKind {
+ MALLOC,
+ CALLOC,
+ ALIGNED_ALLOC,
+ } Kind;
+
+ /// The library function id for the allocation.
+ LibFunc LibraryFunctionId;
+
+ /// The status wrt. a rewrite.
+ enum {
+ STACK_DUE_TO_USE,
+ STACK_DUE_TO_FREE,
+ INVALID,
+ } Status = STACK_DUE_TO_USE;
+
+ /// Flag to indicate if we encountered a use that might free this allocation
+ /// but which is not in the deallocation infos.
+ bool HasPotentiallyFreeingUnknownUses = false;
+
+ /// The set of free calls that use this allocation.
+ SmallPtrSet<CallBase *, 1> PotentialFreeCalls;
+ };
+
+ struct DeallocationInfo {
+ /// The call that deallocates the memory.
+ CallBase *CB;
+
+ /// Flag to indicate if we don't know all objects this deallocation might
+ /// free..
+ bool MightFreeUnknownObjects = false;
+
+ /// The set of allocation calls that are potentially freed.
+ SmallPtrSet<CallBase *, 1> PotentialAllocationCalls;
+ };
+
+ AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
: AAHeapToStack(IRP, A) {}
+ void initialize(Attributor &A) override {
+ AAHeapToStack::initialize(A);
+
+ const Function *F = getAnchorScope();
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ auto AllocationIdentifierCB = [&](Instruction &I) {
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ return true;
+ if (isFreeCall(CB, TLI)) {
+ DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB};
+ return true;
+ }
+ bool IsMalloc = isMallocLikeFn(CB, TLI);
+ bool IsAlignedAllocLike = !IsMalloc && isAlignedAllocLikeFn(CB, TLI);
+ bool IsCalloc =
+ !IsMalloc && !IsAlignedAllocLike && isCallocLikeFn(CB, TLI);
+ if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc)
+ return true;
+ auto Kind =
+ IsMalloc ? AllocationInfo::AllocationKind::MALLOC
+ : (IsCalloc ? AllocationInfo::AllocationKind::CALLOC
+ : AllocationInfo::AllocationKind::ALIGNED_ALLOC);
+
+ AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB, Kind};
+ AllocationInfos[CB] = AI;
+ TLI->getLibFunc(*CB, AI->LibraryFunctionId);
+ return true;
+ };
+
+ bool Success = A.checkForAllCallLikeInstructions(
+ AllocationIdentifierCB, *this, /* CheckBBLivenessOnly */ false,
+ /* CheckPotentiallyDead */ true);
+ (void)Success;
+ assert(Success && "Did not expect the call base visit callback to fail!");
+ }
+
const std::string getAsStr() const override {
- return "[H2S] Mallocs Good/Bad: " + std::to_string(MallocCalls.size()) +
- "/" + std::to_string(BadMallocCalls.size());
+ unsigned NumH2SMallocs = 0, NumInvalidMallocs = 0;
+ for (const auto &It : AllocationInfos) {
+ if (It.second->Status == AllocationInfo::INVALID)
+ ++NumInvalidMallocs;
+ else
+ ++NumH2SMallocs;
+ }
+ return "[H2S] Mallocs Good/Bad: " + std::to_string(NumH2SMallocs) + "/" +
+ std::to_string(NumInvalidMallocs);
}
- bool isAssumedHeapToStack(CallBase &CB) const override {
- return isValidState() && MallocCalls.contains(&CB) &&
- !BadMallocCalls.count(&CB);
+ /// See AbstractAttribute::trackStatistics().
+ void trackStatistics() const override {
+ STATS_DECL(
+ MallocCalls, Function,
+ "Number of malloc/calloc/aligned_alloc calls converted to allocas");
+ for (auto &It : AllocationInfos)
+ if (It.second->Status != AllocationInfo::INVALID)
+ ++BUILD_STAT_NAME(MallocCalls, Function);
}
- bool isKnownHeapToStack(CallBase &CB) const override {
- return isValidState() && MallocCalls.contains(&CB) &&
- !BadMallocCalls.count(&CB);
+ bool isAssumedHeapToStack(CallBase &CB) const override {
+ if (isValidState())
+ if (AllocationInfo *AI = AllocationInfos.lookup(&CB))
+ return AI->Status != AllocationInfo::INVALID;
+ return false;
}
ChangeStatus manifest(Attributor &A) override {
@@ -4864,76 +4963,82 @@ struct AAHeapToStackImpl : public AAHeapToStack {
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
- for (Instruction *MallocCall : MallocCalls) {
- // This malloc cannot be replaced.
- if (BadMallocCalls.count(MallocCall))
+ for (auto &It : AllocationInfos) {
+ AllocationInfo &AI = *It.second;
+ if (AI.Status == AllocationInfo::INVALID)
continue;
- for (Instruction *FreeCall : FreesForMalloc[MallocCall]) {
+ for (CallBase *FreeCall : AI.PotentialFreeCalls) {
LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
A.deleteAfterManifest(*FreeCall);
HasChanged = ChangeStatus::CHANGED;
}
- LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
+ LLVM_DEBUG(dbgs() << "H2S: Removing malloc-like call: " << *AI.CB
<< "\n");
auto Remark = [&](OptimizationRemark OR) {
LibFunc IsAllocShared;
- if (auto *CB = dyn_cast<CallBase>(MallocCall)) {
- TLI->getLibFunc(*CB, IsAllocShared);
+ if (TLI->getLibFunc(*AI.CB, IsAllocShared))
if (IsAllocShared == LibFunc___kmpc_alloc_shared)
return OR << "Moving globalized variable to the stack.";
- }
return OR << "Moving memory allocation from the heap to the stack.";
};
- A.emitRemark<OptimizationRemark>(MallocCall, "HeapToStack", Remark);
+ A.emitRemark<OptimizationRemark>(AI.CB, "HeapToStack", Remark);
- Align Alignment;
Value *Size;
- if (isCallocLikeFn(MallocCall, TLI)) {
- auto *Num = MallocCall->getOperand(0);
- auto *SizeT = MallocCall->getOperand(1);
- IRBuilder<> B(MallocCall);
+ Optional<APInt> SizeAPI = getSize(A, *this, AI);
+ if (SizeAPI.hasValue()) {
+ Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI);
+ } else if (AI.Kind == AllocationInfo::AllocationKind::CALLOC) {
+ auto *Num = AI.CB->getOperand(0);
+ auto *SizeT = AI.CB->getOperand(1);
+ IRBuilder<> B(AI.CB);
Size = B.CreateMul(Num, SizeT, "h2s.calloc.size");
- } else if (isAlignedAllocLikeFn(MallocCall, TLI)) {
- Size = MallocCall->getOperand(1);
- Alignment = MaybeAlign(cast<ConstantInt>(MallocCall->getOperand(0))
- ->getValue()
- .getZExtValue())
- .valueOrOne();
+ } else if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC) {
+ Size = AI.CB->getOperand(1);
} else {
- Size = MallocCall->getOperand(0);
+ Size = AI.CB->getOperand(0);
}
- unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace();
- Instruction *AI =
+ Align Alignment(1);
+ if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC) {
+ Optional<APInt> AlignmentAPI =
+ getAPInt(A, *this, *AI.CB->getArgOperand(0));
+ assert(AlignmentAPI.hasValue() &&
+ "Expected an alignment during manifest!");
+ Alignment =
+ max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue()));
+ }
+
+ unsigned AS = cast<PointerType>(AI.CB->getType())->getAddressSpace();
+ Instruction *Alloca =
new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
- "", MallocCall->getNextNode());
+ "", AI.CB->getNextNode());
- if (AI->getType() != MallocCall->getType())
- AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
- AI->getNextNode());
+ if (Alloca->getType() != AI.CB->getType())
+ Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc",
+ Alloca->getNextNode());
- A.changeValueAfterManifest(*MallocCall, *AI);
+ A.changeValueAfterManifest(*AI.CB, *Alloca);
- if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
+ if (auto *II = dyn_cast<InvokeInst>(AI.CB)) {
auto *NBB = II->getNormalDest();
- BranchInst::Create(NBB, MallocCall->getParent());
- A.deleteAfterManifest(*MallocCall);
+ BranchInst::Create(NBB, AI.CB->getParent());
+ A.deleteAfterManifest(*AI.CB);
} else {
- A.deleteAfterManifest(*MallocCall);
+ A.deleteAfterManifest(*AI.CB);
}
// Zero out the allocated memory if it was a calloc.
- if (isCallocLikeFn(MallocCall, TLI)) {
- auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc",
- AI->getNextNode());
+ if (AI.Kind == AllocationInfo::AllocationKind::CALLOC) {
+ auto *BI = new BitCastInst(Alloca, AI.CB->getType(), "calloc_bc",
+ Alloca->getNextNode());
Value *Ops[] = {
BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size,
ConstantInt::get(Type::getInt1Ty(F->getContext()), false)};
- Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()};
+ Type *Tys[] = {BI->getType(), AI.CB->getOperand(0)->getType()};
Module *M = F->getParent();
Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
CallInst::Create(Fn, Ops, "", BI->getNextNode());
@@ -4944,21 +5049,58 @@ struct AAHeapToStackImpl : public AAHeapToStack {
return HasChanged;
}
- /// Collection of all malloc calls in a function.
- SmallSetVector<Instruction *, 4> MallocCalls;
+ Optional<APInt> getAPInt(Attributor &A, const AbstractAttribute &AA,
+ Value &V) {
+ bool UsedAssumedInformation = false;
+ Optional<Constant *> SimpleV =
+ A.getAssumedConstant(V, AA, UsedAssumedInformation);
+ if (!SimpleV.hasValue())
+ return APInt(64, 0);
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
+ return CI->getValue();
+ return llvm::None;
+ }
+
+ Optional<APInt> getSize(Attributor &A, const AbstractAttribute &AA,
+ AllocationInfo &AI) {
+
+ if (AI.Kind == AllocationInfo::AllocationKind::MALLOC)
+ return getAPInt(A, AA, *AI.CB->getArgOperand(0));
+
+ if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC)
+ // Only if the alignment is also constant we return a size.
+ return getAPInt(A, AA, *AI.CB->getArgOperand(0)).hasValue()
+ ? getAPInt(A, AA, *AI.CB->getArgOperand(1))
+ : llvm::None;
+
+ assert(AI.Kind == AllocationInfo::AllocationKind::CALLOC &&
+ "Expected only callocs are left");
+ Optional<APInt> Num = getAPInt(A, AA, *AI.CB->getArgOperand(0));
+ Optional<APInt> Size = getAPInt(A, AA, *AI.CB->getArgOperand(1));
+ if (!Num.hasValue() || !Size.hasValue())
+ return llvm::None;
+ bool Overflow = false;
+ Size = Size.getValue().umul_ov(Num.getValue(), Overflow);
+ return Overflow ? llvm::None : Size;
+ }
- /// Collection of malloc calls that cannot be converted.
- DenseSet<const Instruction *> BadMallocCalls;
+ /// Collection of all malloc-like calls in a function with associated
+ /// information.
+ DenseMap<CallBase *, AllocationInfo *> AllocationInfos;
- /// A map for each malloc call to the set of associated free calls.
- DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc;
+ /// Collection of all free-like calls in a function with associated
+ /// information.
+ DenseMap<CallBase *, DeallocationInfo *> DeallocationInfos;
ChangeStatus updateImpl(Attributor &A) override;
};
-ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
+ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
const Function *F = getAnchorScope();
- const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ const auto &LivenessAA =
+ A.getAAFor<AAIsDead>(*this, IRPosition::function(*F), DepClassTy::NONE);
MustBeExecutedContextExplorer &Explorer =
A.getInfoCache().getMustBeExecutedContextExplorer();
@@ -4966,7 +5108,66 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
bool StackIsAccessibleByOtherThreads =
A.getInfoCache().stackIsAccessibleByOtherThreads();
- auto FreeCheck = [&](Instruction &I) {
+ // Flag to ensure we update our deallocation information at most once per
+ // updateImpl call and only if we use the free check reasoning.
+ bool HasUpdatedFrees = false;
+
+ auto UpdateFrees = [&]() {
+ HasUpdatedFrees = true;
+
+ for (auto &It : DeallocationInfos) {
+ DeallocationInfo &DI = *It.second;
+ // For now we cannot use deallocations that have unknown inputs, skip
+ // them.
+ if (DI.MightFreeUnknownObjects)
+ continue;
+
+ // No need to analyze dead calls, ignore them instead.
+ if (A.isAssumedDead(*DI.CB, this, &LivenessAA,
+ /* CheckBBLivenessOnly */ true))
+ continue;
+
+ // Use the optimistic version to get the freed objects, ignoring dead
+ // branches etc.
+ SmallVector<Value *, 8> Objects;
+ if (!getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects,
+ *this, DI.CB)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n");
+ DI.MightFreeUnknownObjects = true;
+ continue;
+ }
+
+ // Check each object explicitly.
+ for (auto *Obj : Objects) {
+ // Free of null and undef can be ignored as no-ops (or UB in the latter
+ // case).
+ if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj))
+ continue;
+
+ CallBase *ObjCB = dyn_cast<CallBase>(Obj);
+ if (!ObjCB) {
+ LLVM_DEBUG(dbgs()
+ << "[H2S] Free of a non-call object: " << *Obj << "\n");
+ DI.MightFreeUnknownObjects = true;
+ continue;
+ }
+
+ AllocationInfo *AI = AllocationInfos.lookup(ObjCB);
+ if (!AI) {
+ LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj
+ << "\n");
+ DI.MightFreeUnknownObjects = true;
+ continue;
+ }
+
+ DI.PotentialAllocationCalls.insert(ObjCB);
+ }
+ }
+ };
+
+ auto FreeCheck = [&](AllocationInfo &AI) {
// If the stack is not accessible by other threads, the "must-free" logic
// doesn't apply as the pointer could be shared and needs to be places in
// "shareable" memory.
@@ -4980,19 +5181,55 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
return false;
}
}
- const auto &Frees = FreesForMalloc.lookup(&I);
- if (Frees.size() != 1) {
+ if (!HasUpdatedFrees)
+ UpdateFrees();
+
+ // TODO: Allow multi exit functions that have
diff erent free calls.
+ if (AI.PotentialFreeCalls.size() != 1) {
LLVM_DEBUG(dbgs() << "[H2S] did not find one free call but "
- << Frees.size() << "\n");
+ << AI.PotentialFreeCalls.size() << "\n");
+ return false;
+ }
+ CallBase *UniqueFree = *AI.PotentialFreeCalls.begin();
+ DeallocationInfo *DI = DeallocationInfos.lookup(UniqueFree);
+ if (!DI) {
+ LLVM_DEBUG(
+ dbgs() << "[H2S] unique free call was not known as deallocation call "
+ << *UniqueFree << "\n");
+ return false;
+ }
+ if (DI->MightFreeUnknownObjects) {
+ LLVM_DEBUG(
+ dbgs() << "[H2S] unique free call might free unkown allocations\n");
return false;
}
- Instruction *UniqueFree = *Frees.begin();
- return Explorer.findInContextOf(UniqueFree, I.getNextNode());
+ if (DI->PotentialAllocationCalls.size() > 1) {
+ LLVM_DEBUG(dbgs() << "[H2S] unique free call might free "
+ << DI->PotentialAllocationCalls.size()
+ << "
diff erent allocations\n");
+ return false;
+ }
+ if (*DI->PotentialAllocationCalls.begin() != AI.CB) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[H2S] unique free call not known to free this allocation but "
+ << **DI->PotentialAllocationCalls.begin() << "\n");
+ return false;
+ }
+ Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
+ if (!Explorer.findInContextOf(UniqueFree, CtxI)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[H2S] unique free call might not be executed with the allocation "
+ << *UniqueFree << "\n");
+ return false;
+ }
+ return true;
};
- auto UsesCheck = [&](Instruction &I) {
+ auto UsesCheck = [&](AllocationInfo &AI) {
bool ValidUsesOnly = true;
- bool MustUse = true;
+
auto Pred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
if (isa<LoadInst>(UserI))
@@ -5010,15 +5247,8 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
if (auto *CB = dyn_cast<CallBase>(UserI)) {
if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd())
return true;
- // Record malloc.
- if (isFreeCall(UserI, TLI)) {
- if (MustUse) {
- FreesForMalloc[&I].insert(UserI);
- } else {
- LLVM_DEBUG(dbgs() << "[H2S] free potentially on
diff erent mallocs: "
- << *UserI << "\n");
- ValidUsesOnly = false;
- }
+ if (DeallocationInfos.count(CB)) {
+ AI.PotentialFreeCalls.insert(CB);
return true;
}
@@ -5033,8 +5263,12 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::OPTIONAL);
- if (!NoCaptureAA.isAssumedNoCapture() ||
- !ArgNoFreeAA.isAssumedNoFree()) {
+ bool MaybeCaptured = !NoCaptureAA.isAssumedNoCapture();
+ bool MaybeFreed = !ArgNoFreeAA.isAssumedNoFree();
+ if (MaybeCaptured ||
+ (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared &&
+ MaybeFreed)) {
+ AI.HasPotentiallyFreeingUnknownUses |= MaybeFreed;
// Emit a missed remark if this is missed OpenMP globalization.
auto Remark = [&](OptimizationRemarkMissed ORM) {
@@ -5045,13 +5279,9 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
<< "Mark as noescape to override.";
};
- LibFunc IsAllocShared;
- if (auto *AllocShared = dyn_cast<CallBase>(&I)) {
- TLI->getLibFunc(*AllocShared, IsAllocShared);
- if (IsAllocShared == LibFunc___kmpc_alloc_shared)
- A.emitRemark<OptimizationRemarkMissed>(
- AllocShared, "HeapToStackFailed", Remark);
- }
+ if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared)
+ A.emitRemark<OptimizationRemarkMissed>(AI.CB, "HeapToStackFailed",
+ Remark);
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
ValidUsesOnly = false;
@@ -5061,7 +5291,6 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
- MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI));
Follow = true;
return true;
}
@@ -5071,96 +5300,64 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
ValidUsesOnly = false;
return true;
};
- A.checkForAllUses(Pred, *this, I);
+ A.checkForAllUses(Pred, *this, *AI.CB);
return ValidUsesOnly;
};
- auto MallocCallocCheck = [&](Instruction &I) {
- if (BadMallocCalls.count(&I))
- return true;
-
- bool IsMalloc = isMallocLikeFn(&I, TLI);
- bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI);
- bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI);
- if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) {
- BadMallocCalls.insert(&I);
- return true;
- }
+ // The actual update starts here. We look at all allocations and depending on
+ // their status perform the appropriate check(s).
+ for (auto &It : AllocationInfos) {
+ AllocationInfo &AI = *It.second;
+ if (AI.Status == AllocationInfo::INVALID)
+ continue;
- if (IsMalloc) {
- if (MaxHeapToStackSize == -1) {
- if (UsesCheck(I) || FreeCheck(I)) {
- MallocCalls.insert(&I);
- return true;
- }
- }
- if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
- if (Size->getValue().ule(MaxHeapToStackSize))
- if (UsesCheck(I) || FreeCheck(I)) {
- MallocCalls.insert(&I);
- return true;
- }
- } else if (IsAlignedAllocLike && isa<ConstantInt>(I.getOperand(0))) {
- if (MaxHeapToStackSize == -1) {
- if (UsesCheck(I) || FreeCheck(I)) {
- MallocCalls.insert(&I);
- return true;
- }
- }
- // Only if the alignment and sizes are constant.
- if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
- if (Size->getValue().ule(MaxHeapToStackSize))
- if (UsesCheck(I) || FreeCheck(I)) {
- MallocCalls.insert(&I);
- return true;
- }
- } else if (IsCalloc) {
- if (MaxHeapToStackSize == -1) {
- if (UsesCheck(I) || FreeCheck(I)) {
- MallocCalls.insert(&I);
- return true;
+ if (MaxHeapToStackSize == -1) {
+ if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC)
+ if (!getAPInt(A, *this, *AI.CB->getArgOperand(0)).hasValue()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB
+ << "\n");
+ AI.Status = AllocationInfo::INVALID;
+ Changed = ChangeStatus::CHANGED;
+ continue;
}
+ } else {
+ Optional<APInt> Size = getSize(A, *this, AI);
+ if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
+ LLVM_DEBUG({
+ if (!Size.hasValue())
+ dbgs() << "[H2S] Unknown allocation size (or alignment): " << *AI.CB
+ << "\n";
+ else
+ dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
+ << MaxHeapToStackSize << "\n";
+ });
+
+ AI.Status = AllocationInfo::INVALID;
+ Changed = ChangeStatus::CHANGED;
+ continue;
}
- bool Overflow = false;
- if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
- if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
- if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
- .ule(MaxHeapToStackSize))
- if (!Overflow && (UsesCheck(I) || FreeCheck(I))) {
- MallocCalls.insert(&I);
- return true;
- }
}
- BadMallocCalls.insert(&I);
- return true;
- };
-
- size_t NumBadMallocs = BadMallocCalls.size();
-
- A.checkForAllCallLikeInstructions(MallocCallocCheck, *this);
-
- if (NumBadMallocs != BadMallocCalls.size())
- return ChangeStatus::CHANGED;
+ switch (AI.Status) {
+ case AllocationInfo::STACK_DUE_TO_USE:
+ if (UsesCheck(AI))
+ continue;
+ AI.Status = AllocationInfo::STACK_DUE_TO_FREE;
+ LLVM_FALLTHROUGH;
+ case AllocationInfo::STACK_DUE_TO_FREE:
+ if (FreeCheck(AI))
+ continue;
+ AI.Status = AllocationInfo::INVALID;
+ Changed = ChangeStatus::CHANGED;
+ continue;
+ case AllocationInfo::INVALID:
+ llvm_unreachable("Invalid allocations should never reach this point!");
+ };
+ }
- return ChangeStatus::UNCHANGED;
+ return Changed;
}
-struct AAHeapToStackFunction final : public AAHeapToStackImpl {
- AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
- : AAHeapToStackImpl(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics().
- void trackStatistics() const override {
- STATS_DECL(
- MallocCalls, Function,
- "Number of malloc/calloc/aligned_alloc calls converted to allocas");
- for (auto *C : MallocCalls)
- if (!BadMallocCalls.count(C))
- ++BUILD_STAT_NAME(MallocCalls, Function);
- }
-};
-
/// ----------------------- Privatizable Pointers ------------------------------
struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A)
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5c4be0b8b9bda..0127f9da43083 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2472,7 +2472,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (CallBase *CB : MallocCalls) {
// Skip replacing this if HeapToStack has already claimed it.
- if (HS && HS->isKnownHeapToStack(*CB))
+ if (HS && HS->isAssumedHeapToStack(*CB))
continue;
// Find the unique free call to remove it.
diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll
index 03423d82533d2..fa9fc6c06caa8 100644
--- a/llvm/test/Transforms/Attributor/depgraph.ll
+++ b/llvm/test/Transforms/Attributor/depgraph.ll
@@ -123,7 +123,7 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state [H2S] Mallocs Good/Bad: 0/1
+; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state [H2S] Mallocs Good/Bad: 0/0
; GRAPH-EMPTY:
; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state not-simple
; GRAPH-EMPTY:
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index aa09f1b8b5628..50b8463f8df16 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -55,21 +55,21 @@ define void @h2s_value_simplify_interaction(i1 %c, i8* %A) {
; IS________NPM-LABEL: define {{[^@]+}}@h2s_value_simplify_interaction
; IS________NPM-SAME: (i1 [[C:%.*]], i8* nocapture nofree [[A:%.*]]) {
; IS________NPM-NEXT: entry:
-; IS________NPM-NEXT: [[M:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
; IS________NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
; IS________NPM: t:
; IS________NPM-NEXT: br i1 false, label [[DEAD:%.*]], label [[F2:%.*]]
; IS________NPM: f:
; IS________NPM-NEXT: br label [[J:%.*]]
; IS________NPM: f2:
-; IS________NPM-NEXT: [[L:%.*]] = load i8, i8* [[M]], align 1
+; IS________NPM-NEXT: [[L:%.*]] = load i8, i8* [[TMP0]], align 1
; IS________NPM-NEXT: call void @usei8(i8 [[L]])
-; IS________NPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef [[M]]) #[[ATTR6:[0-9]+]]
+; IS________NPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
; IS________NPM-NEXT: br label [[J]]
; IS________NPM: dead:
; IS________NPM-NEXT: unreachable
; IS________NPM: j:
-; IS________NPM-NEXT: [[PHI:%.*]] = phi i8* [ [[M]], [[F]] ], [ null, [[F2]] ]
+; IS________NPM-NEXT: [[PHI:%.*]] = phi i8* [ [[TMP0]], [[F]] ], [ null, [[F2]] ]
; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[PHI]]) #[[ATTR6]]
; IS________NPM-NEXT: ret void
;
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 0635ec6b7813c..865675cf7797a 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -13,8 +13,8 @@ target triple = "nvptx64"
define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
-; CHECK-NEXT: call void @bar() #[[ATTR0]]
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @bar()
; CHECK-NEXT: ret void
;
entry:
@@ -25,7 +25,7 @@ entry:
define internal void @foo() {
; CHECK-LABEL: define {{[^@]+}}@foo
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
; CHECK-NEXT: ret void
@@ -41,7 +41,7 @@ define internal void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG6:![0-9]+]]
; CHECK-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]]) #[[ATTR0]]
; CHECK-NEXT: ret void
More information about the llvm-commits
mailing list