[llvm] [Attributor] New attribute to identify what byte ranges are alive for an allocation (PR #66148)
Vidhush Singhal via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 28 20:58:02 PDT 2023
https://github.com/vidsinghal updated https://github.com/llvm/llvm-project/pull/66148
>From 5e92f240f8b48128c38e2a33389f97407294527c Mon Sep 17 00:00:00 2001
From: vidsinghal <vidush.sl at gmail.com>
Date: Fri, 4 Aug 2023 00:06:56 -0400
Subject: [PATCH] [Attributor] New attribute to identify what byte ranges are
alive for an allocation
Changes the size of allocations automatically.
Only implements the case when a single range from start of the allocation is alive.
Differential Revision: https://reviews.llvm.org/D157068
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 43 ++
llvm/lib/Transforms/IPO/Attributor.cpp | 15 +-
.../Transforms/IPO/AttributorAttributes.cpp | 240 +++++++-
.../Attributor/ArgumentPromotion/crash.ll | 4 +-
.../live_called_from_dead.ll | 2 +
.../live_called_from_dead_2.ll | 2 +
.../nonzero-address-spaces.ll | 1 +
.../Attributor/IPConstantProp/pthreads.ll | 8 +-
llvm/test/Transforms/Attributor/allocator.ll | 518 ++++++++++++++++++
.../Transforms/Attributor/heap_to_stack.ll | 1 +
.../Attributor/heap_to_stack_gpu.ll | 1 +
llvm/test/Transforms/Attributor/liveness.ll | 16 +-
llvm/test/Transforms/Attributor/nodelete.ll | 2 +
13 files changed, 830 insertions(+), 23 deletions(-)
create mode 100644 llvm/test/Transforms/Attributor/allocator.ll
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index bd1bd8261123e51..2fc46172c922a8e 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -103,6 +103,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/CFG.h"
@@ -132,6 +133,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
@@ -6117,6 +6119,12 @@ struct AAPointerInfo : public AbstractAttribute {
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
+ using OffsetBinsTy = DenseMap<AA::RangeTy, SmallSet<unsigned, 4>>;
+ using const_bin_iterator = OffsetBinsTy::const_iterator;
+ virtual const_bin_iterator begin() const = 0;
+ virtual const_bin_iterator end() const = 0;
+ virtual int64_t numOffsetBins() const = 0;
+
/// Call \p CB on all accesses that might interfere with \p Range and return
/// true if all such accesses were known and the callback returned true for
/// all of them, false otherwise. An access interferes with an offset-size
@@ -6270,6 +6278,41 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
static const char ID;
};
+struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
+ AAAllocationInfo(const IRPosition &IRP, Attributor &A)
+ : StateWrapper<BooleanState, AbstractAttribute>(IRP) {}
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAllocationInfo &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ virtual std::optional<TypeSize> getAllocatedSize() const = 0;
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAAllocationInfo"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAllocationInfo
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ constexpr static const std::optional<TypeSize> HasNoAllocationSize =
+ std::optional<TypeSize>(TypeSize(-1, true));
+
+ static const char ID;
+};
+
/// An abstract interface for llvm::GlobalValue information interference.
struct AAGlobalValueInfo
: public StateWrapper<BooleanState, AbstractAttribute> {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 49ced893d5c7340..f1a88bc564ced71 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3611,14 +3611,13 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
};
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
- bool Success;
+ [[maybe_unused]] bool Success;
bool UsedAssumedInformation = false;
Success = checkForAllInstructionsImpl(
nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr,
{(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
(unsigned)Instruction::Call},
UsedAssumedInformation);
- (void)Success;
assert(Success && "Expected the check call to be successful!");
auto LoadStorePred = [&](Instruction &I) -> bool {
@@ -3644,7 +3643,17 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr,
{(unsigned)Instruction::Load, (unsigned)Instruction::Store},
UsedAssumedInformation);
- (void)Success;
+ assert(Success && "Expected the check call to be successful!");
+
+ // AllocaInstPredicate
+ auto AAAllocationInfoPred = [&](Instruction &I) -> bool {
+ getOrCreateAAFor<AAAllocationInfo>(IRPosition::value(I));
+ return true;
+ };
+
+ Success = checkForAllInstructionsImpl(
+ nullptr, OpcodeInstMap, AAAllocationInfoPred, nullptr, nullptr,
+ {(unsigned)Instruction::Alloca}, UsedAssumedInformation);
assert(Success && "Expected the check call to be successful!");
}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index bbb0cfa0eb05fe6..b33255db4745717 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -65,6 +65,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -192,6 +193,7 @@ PIPE_OPERATOR(AAPointerInfo)
PIPE_OPERATOR(AAAssumptionInfo)
PIPE_OPERATOR(AAUnderlyingObjects)
PIPE_OPERATOR(AAAddressSpace)
+PIPE_OPERATOR(AAAllocationInfo)
PIPE_OPERATOR(AAIndirectCallInfo)
PIPE_OPERATOR(AAGlobalValueInfo)
PIPE_OPERATOR(AADenormalFPMath)
@@ -881,11 +883,9 @@ struct AA::PointerInfo::State : public AbstractState {
AAPointerInfo::AccessKind Kind, Type *Ty,
Instruction *RemoteI = nullptr);
- using OffsetBinsTy = DenseMap<RangeTy, SmallSet<unsigned, 4>>;
-
- using const_bin_iterator = OffsetBinsTy::const_iterator;
- const_bin_iterator begin() const { return OffsetBins.begin(); }
- const_bin_iterator end() const { return OffsetBins.end(); }
+ AAPointerInfo::const_bin_iterator begin() const { return OffsetBins.begin(); }
+ AAPointerInfo::const_bin_iterator end() const { return OffsetBins.end(); }
+ int64_t numOffsetBins() const { return OffsetBins.size(); }
const AAPointerInfo::Access &getAccess(unsigned Index) const {
return AccessList[Index];
@@ -905,7 +905,7 @@ struct AA::PointerInfo::State : public AbstractState {
// are all combined into a single Access object. This may result in loss of
// information in RangeTy in the Access object.
SmallVector<AAPointerInfo::Access> AccessList;
- OffsetBinsTy OffsetBins;
+ AAPointerInfo::OffsetBinsTy OffsetBins;
DenseMap<const Instruction *, SmallVector<unsigned>> RemoteIMap;
/// See AAPointerInfo::forallInterferingAccesses.
@@ -1109,6 +1109,12 @@ struct AAPointerInfoImpl
return AAPointerInfo::manifest(A);
}
+ virtual const_bin_iterator begin() const override { return State::begin(); }
+ virtual const_bin_iterator end() const override { return State::end(); }
+ virtual int64_t numOffsetBins() const override {
+ return State::numOffsetBins();
+ }
+
bool forallInterferingAccesses(
AA::RangeTy Range,
function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
@@ -6521,7 +6527,7 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- return indicatePessimisticFixpoint();
+ return indicatePessimisticFixpoint();
}
void trackStatistics() const override {
@@ -12688,6 +12694,224 @@ struct AAAddressSpaceCallSiteArgument final : AAAddressSpaceImpl {
};
} // namespace
+/// ----------- Allocation Info ----------
+namespace {
+struct AAAllocationInfoImpl : public AAAllocationInfo {
+ AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfo(IRP, A) {}
+
+ std::optional<TypeSize> getAllocatedSize() const override {
+ assert(isValidState() && "the AA is invalid");
+ return AssumedAllocatedSize;
+ }
+
+ std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
+ const DataLayout &DL) {
+
+ // TODO: implement case for malloc like instructions
+ switch (I->getOpcode()) {
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(I);
+ return AI->getAllocationSize(DL);
+ }
+ default:
+ return std::nullopt;
+ }
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+
+ const IRPosition &IRP = getIRPosition();
+ Instruction *I = IRP.getCtxI();
+
+ // TODO: update check for malloc like calls
+ if (!isa<AllocaInst>(I))
+ return indicatePessimisticFixpoint();
+
+ bool IsKnownNoCapture;
+ if (!AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, IRP, DepClassTy::OPTIONAL, IsKnownNoCapture))
+ return indicatePessimisticFixpoint();
+
+ const AAPointerInfo *PI =
+ A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
+
+ if (!PI)
+ return indicatePessimisticFixpoint();
+
+ if (!PI->getState().isValidState())
+ return indicatePessimisticFixpoint();
+
+ const DataLayout &DL = A.getDataLayout();
+ const auto AllocationSize = findInitialAllocationSize(I, DL);
+
+ // If allocation size is nullopt, we give up.
+ if (!AllocationSize)
+ return indicatePessimisticFixpoint();
+
+ // For zero sized allocations, we give up.
+ // Since we can't reduce further
+ if (*AllocationSize == 0)
+ return indicatePessimisticFixpoint();
+
+ int64_t BinSize = PI->numOffsetBins();
+
+ // TODO: implement for multiple bins
+ if (BinSize > 1)
+ return indicatePessimisticFixpoint();
+
+ if (BinSize == 0) {
+ auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false));
+ if (!changeAllocationSize(NewAllocationSize))
+ return ChangeStatus::UNCHANGED;
+ return ChangeStatus::CHANGED;
+ }
+
+ // TODO: refactor this to be part of multiple bin case
+ const auto &It = PI->begin();
+
+ // TODO: handle if Offset is not zero
+ if (It->first.Offset != 0)
+ return indicatePessimisticFixpoint();
+
+ uint64_t SizeOfBin = It->first.Offset + It->first.Size;
+
+ if (SizeOfBin >= *AllocationSize)
+ return indicatePessimisticFixpoint();
+
+ auto NewAllocationSize =
+ std::optional<TypeSize>(TypeSize(SizeOfBin * 8, false));
+
+ if (!changeAllocationSize(NewAllocationSize))
+ return ChangeStatus::UNCHANGED;
+
+ return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+
+ assert(isValidState() &&
+ "Manifest should only be called if the state is valid.");
+
+ Instruction *I = getIRPosition().getCtxI();
+
+ auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue();
+
+ unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
+
+ switch (I->getOpcode()) {
+ // TODO: add case for malloc like calls
+ case Instruction::Alloca: {
+
+ AllocaInst *AI = cast<AllocaInst>(I);
+
+ Type *CharType = Type::getInt8Ty(I->getContext());
+
+ auto *NumBytesToValue = llvm::ConstantInt::get(
+ I->getContext(), llvm::APInt(32, NumBytesToAllocate));
+
+ AllocaInst *NewAllocaInst =
+ new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue,
+ AI->getAlign(), AI->getName(), AI->getNextNode());
+
+ if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst))
+ return ChangeStatus::CHANGED;
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *A) const override {
+ if (!isValidState())
+ return "allocationinfo(<invalid>)";
+ return "allocationinfo(" +
+ (AssumedAllocatedSize == HasNoAllocationSize
+ ? "none"
+ : std::to_string(AssumedAllocatedSize->getFixedValue())) +
+ ")";
+ }
+
+private:
+ std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
+
+ // Maintain the computed allocation size of the object.
+ // Returns (bool) weather the size of the allocation was modified or not.
+ bool changeAllocationSize(std::optional<TypeSize> Size) {
+ if (AssumedAllocatedSize == HasNoAllocationSize ||
+ AssumedAllocatedSize != Size) {
+ AssumedAllocatedSize = Size;
+ return true;
+ }
+ return false;
+ }
+};
+
+struct AAAllocationInfoFloating : AAAllocationInfoImpl {
+ AAAllocationInfoFloating(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoReturned : AAAllocationInfoImpl {
+ AAAllocationInfoReturned(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: we don't rewrite function argument for now because it will need to
+ // rewrite the function signature and all call sites
+ (void)indicatePessimisticFixpoint();
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoCallSiteReturned : AAAllocationInfoImpl {
+ AAAllocationInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoArgument : AAAllocationInfoImpl {
+ AAAllocationInfoArgument(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoCallSiteArgument : AAAllocationInfoImpl {
+ AAAllocationInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+
+ (void)indicatePessimisticFixpoint();
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(allocationinfo);
+ }
+};
+} // namespace
+
const char AANoUnwind::ID = 0;
const char AANoSync::ID = 0;
const char AANoFree::ID = 0;
@@ -12721,6 +12945,7 @@ const char AAPointerInfo::ID = 0;
const char AAAssumptionInfo::ID = 0;
const char AAUnderlyingObjects::ID = 0;
const char AAAddressSpace::ID = 0;
+const char AAAllocationInfo::ID = 0;
const char AAIndirectCallInfo::ID = 0;
const char AAGlobalValueInfo::ID = 0;
const char AADenormalFPMath::ID = 0;
@@ -12854,6 +13079,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
index a42aedd1da0d5e9..595cb37c6c93ec9 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
@@ -107,7 +107,9 @@ define i32 @test_inf_promote_caller(i32 %arg) {
; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca [[S:%.*]], align 8
-; CGSCC-NEXT: [[TRUETMP1:%.*]] = alloca [[S]], align 8
+; CGSCC-NEXT: [[TMP3:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT: [[TMP1:%.*]] = alloca [[S]], align 8
+; CGSCC-NEXT: [[TMP14:%.*]] = alloca i8, i32 0, align 8
; CGSCC-NEXT: ret i32 0
;
bb:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
index b52e156eb21b9c7..2df81d6cb1832d0 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
@@ -37,6 +37,8 @@ define internal i32 @caller(ptr %B) {
; CGSCC-LABEL: define {{[^@]+}}@caller
; CGSCC-SAME: () #[[ATTR0]] {
; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4
+; CGSCC-NEXT: [[A2:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4
; CGSCC-NEXT: ret i32 0
;
%A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index 732a55101e890c5..7c28de24beea279 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -54,6 +54,8 @@ define internal i32 @caller(ptr %B) {
; CGSCC-LABEL: define {{[^@]+}}@caller
; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4
+; CGSCC-NEXT: [[A2:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4
; CGSCC-NEXT: [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
; CGSCC-NEXT: ret i32 0
;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
index 36347eef39e38f0..38c1d6099042cbc 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
@@ -30,6 +30,7 @@ define internal i32 @foo(ptr) {
; CHECK-SAME: () addrspace(1) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[RETVAL1:%.*]] = alloca i8, i32 0, align 4
; CHECK-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
; CHECK-NEXT: unreachable
;
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
index 5a9fdcc2cb64bbd..ac825468a58c17f 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
@@ -34,13 +34,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define dso_local i32 @main() {
; TUNIT-LABEL: define {{[^@]+}}@main() {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8
-; TUNIT-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8
+; TUNIT-NEXT: [[ALLOC11:%.*]] = alloca i8, i32 0, align 8
+; TUNIT-NEXT: [[ALLOC22:%.*]] = alloca i8, i32 0, align 8
; TUNIT-NEXT: [[THREAD:%.*]] = alloca i64, align 8
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @foo, ptr nofree readnone align 4294967296 undef)
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @bar, ptr noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef)
-; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @baz, ptr noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]])
-; TUNIT-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @buz, ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]])
+; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @baz, ptr noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC11]])
+; TUNIT-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @buz, ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC22]])
; TUNIT-NEXT: ret i32 0
;
; CGSCC-LABEL: define {{[^@]+}}@main() {
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
new file mode 100644
index 000000000000000..7072fd5b9e78d3d
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -0,0 +1,518 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+%struct.Foo = type { i32, i32, i8 }
+
+ at .str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+
+;.
+; CHECK: @[[_STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+;.
+define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_alloca_1
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_ADDR1:%.*]] = alloca i8, i32 4, align 4
+; CHECK-NEXT: [[F2:%.*]] = alloca i8, i32 4, align 4
+; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
+; CHECK-NEXT: store i32 10, ptr [[F2]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT: store i32 [[ADD]], ptr [[F2]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4
+; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP1]], [[VAL]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[ADD3]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %val.addr = alloca i64, align 4
+ %f = alloca %struct.Foo, align 4
+ store i32 %val, ptr %val.addr, align 4
+ %field1 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+ store i32 10, ptr %field1, align 4
+ %field11 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+ %0 = load i32, ptr %field11, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, ptr %field11, align 4
+ %field12 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+ %1 = load i32, ptr %field12, align 4
+ %2 = load i32, ptr %val.addr, align 4
+ %add3 = add nsw i32 %1, %2
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %add3)
+ ret void
+}
+
+; TODO: change malloc like call
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_malloc_1(ptr noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_malloc_1
+; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 12)
+; CHECK-NEXT: store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
+; CHECK-NEXT: store i32 [[ADD]], ptr [[CALL]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %val.addr = alloca ptr, align 8
+ %f = alloca ptr, align 8
+ store ptr %val, ptr %val.addr, align 8
+ %call = call noalias ptr @malloc(i64 noundef 12) #3
+ store ptr %call, ptr %f, align 8
+ %0 = load ptr, ptr %val.addr, align 8
+ %1 = load i32, ptr %0, align 4
+ %add = add nsw i32 %1, 10
+ %2 = load ptr, ptr %f, align 8
+ %a = getelementptr inbounds %struct.Foo, ptr %2, i32 0, i32 0
+ store i32 %add, ptr %a, align 4
+ %3 = load ptr, ptr %f, align 8
+ %a1 = getelementptr inbounds %struct.Foo, ptr %3, i32 0, i32 0
+ %4 = load i32, ptr %a1, align 4
+ %call2 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %4)
+ ret void
+}
+
+; TODO: change malloc like call
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_malloc_2(ptr noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_malloc_2
+; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 60)
+; CHECK-NEXT: store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[CALL]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %val.addr = alloca ptr, align 8
+ %x = alloca i32, align 4
+ %f = alloca ptr, align 8
+ store ptr %val, ptr %val.addr, align 8
+ store i32 15, ptr %x, align 4
+ %0 = load i32, ptr %x, align 4
+ %conv = sext i32 %0 to i64
+ %mul = mul i64 4, %conv
+ %call = call noalias ptr @malloc(i64 noundef %mul)
+ store ptr %call, ptr %f, align 8
+ %1 = load ptr, ptr %val.addr, align 8
+ %2 = load i32, ptr %1, align 4
+ %3 = load ptr, ptr %f, align 8
+ %arrayidx = getelementptr inbounds i32, ptr %3, i64 0
+ store i32 %2, ptr %arrayidx, align 4
+ %4 = load ptr, ptr %f, align 8
+ %arrayidx1 = getelementptr inbounds i32, ptr %4, i64 0
+ %5 = load i32, ptr %arrayidx1, align 4
+ %call2 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %5)
+ ret void
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local ptr @negative_test_escaping_pointer(i32 noundef %val) #0 {
+; CHECK-LABEL: define dso_local ptr @negative_test_escaping_pointer
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 16)
+; CHECK-NEXT: store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT: store i32 2, ptr [[TMP0]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 10, [[VAL]]
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8
+; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
+; CHECK-NEXT: store i32 [[ADD2]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT: ret ptr [[TMP3]]
+;
+entry:
+ %val.addr = alloca i32, align 4
+ %f = alloca ptr, align 8
+ store i32 %val, ptr %val.addr, align 4
+ %call = call noalias ptr @malloc(i64 noundef 16) #2
+ store ptr %call, ptr %f, align 8
+ %0 = load ptr, ptr %f, align 8
+ %field1 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 0
+ store i32 2, ptr %field1, align 8
+ %1 = load i32, ptr %val.addr, align 4
+ %add = add nsw i32 10, %1
+ %2 = load ptr, ptr %f, align 8
+ %field11 = getelementptr inbounds %struct.Foo, ptr %2, i32 0, i32 0
+ %3 = load i32, ptr %field11, align 8
+ %add2 = add nsw i32 %3, %add
+ store i32 %add2, ptr %field11, align 8
+ %4 = load ptr, ptr %f, align 8
+ ret ptr %4
+}
+
+
+;TODO: The allocation can be reduced here.
+;However, the offsets (load/store etc.) Need to be changed.
+; Function Attrs: noinline nounwind uwtable
+define dso_local { i64, ptr } @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define dso_local { i64, ptr } @positive_test_not_a_single_start_offset
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CHECK-NEXT: store i32 2, ptr [[RETVAL]], align 8
+; CHECK-NEXT: [[FIELD3:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[RETVAL]], i32 0, i32 2
+; CHECK-NEXT: store ptr [[VAL_ADDR]], ptr [[FIELD3]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load { i64, ptr }, ptr [[RETVAL]], align 8
+; CHECK-NEXT: ret { i64, ptr } [[TMP0]]
+;
+entry:
+ %retval = alloca %struct.Foo, align 8
+ %val.addr = alloca i32, align 4
+ store i32 %val, ptr %val.addr, align 4
+ %field1 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 0
+ store i32 2, ptr %field1, align 8
+ %field3 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 2
+ store ptr %val.addr, ptr %field3, align 8
+ %0 = load { i64, ptr }, ptr %retval, align 8
+ ret { i64, ptr } %0
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_test_reduce_array_allocation_1() {
+; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARRAY1:%.*]] = alloca i8, i32 4, align 8
+; CHECK-NEXT: store i32 0, ptr [[ARRAY1]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 2
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAY1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 1, 2
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAY1]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: store i32 [[TMP4]], ptr [[ARRAY1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAY1]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP5]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %array = alloca ptr, i32 10
+ store i32 0, ptr %array
+ %0 = load i32, ptr %array
+ %1 = add i32 %0, 2
+ store i32 %1, ptr %array
+ %2 = add i32 1, 2
+ %3 = load i32, ptr %array
+ %4 = add i32 %2, %3
+ store i32 %4, ptr %array
+ %5 = load i32, ptr %array
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %5)
+ ret void
+}
+
+
+; Function Attrs: noinline nounwind uwtable
+; TODO: Here the array size is not known at compile time.
+; However the array does not escape and is only partially used.
+; Should the optimization reduce the allocation size regardless? Based on AAPointerInfo.
+define dso_local void @baz(ptr noundef %val, i32 noundef %arrayLength) #0 {
+; CHECK-LABEL: define dso_local void @baz
+; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]], i32 noundef [[ARRAYLENGTH:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[ARRAYLENGTH_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT: store i32 [[ARRAYLENGTH]], ptr [[ARRAYLENGTH_ADDR]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[ARRAYLENGTH]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 4, [[CONV]]
+; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef [[MUL]])
+; CHECK-NEXT: store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[CALL]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %val.addr = alloca ptr, align 8
+ %arrayLength.addr = alloca i32, align 4
+ %f = alloca ptr, align 8
+ store ptr %val, ptr %val.addr, align 8
+ store i32 %arrayLength, ptr %arrayLength.addr, align 4
+ %0 = load i32, ptr %arrayLength.addr, align 4
+ %conv = sext i32 %0 to i64
+ %mul = mul i64 4, %conv
+ %call = call noalias ptr @malloc(i64 noundef %mul) #3
+ store ptr %call, ptr %f, align 8
+ %1 = load ptr, ptr %val.addr, align 8
+ %2 = load i32, ptr %1, align 4
+ %3 = load ptr, ptr %f, align 8
+ %arrayidx = getelementptr inbounds i32, ptr %3, i64 0
+ store i32 %2, ptr %arrayidx, align 4
+ %4 = load ptr, ptr %f, align 8
+ %arrayidx1 = getelementptr inbounds i32, ptr %4, i64 0
+ %5 = load i32, ptr %arrayidx1, align 4
+ %call2 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %5)
+ ret void
+}
+
+;TODO: Here since only even indexes of the array are part of the output
+;We can reduce the allocation by half and make an array that's accessed contiguously
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_test_reduce_array_allocation_2() #0 {
+; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARRAY:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000)
+; CHECK-NEXT: store ptr [[CALL]], ptr [[ARRAY]], align 8
+; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10000
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]]
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC:%.*]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 2
+; CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND]]
+; CHECK: for.end:
+; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND1:%.*]]
+; CHECK: for.cond1:
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 10000
+; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]]
+; CHECK: for.body3:
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: br label [[FOR_INC7:%.*]]
+; CHECK: for.inc7:
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP7]], 2
+; CHECK-NEXT: store i32 [[ADD8]], ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND1]]
+; CHECK: for.end9:
+; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND10:%.*]]
+; CHECK: for.cond10:
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP8]], 10000
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END18:%.*]]
+; CHECK: for.body12:
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64
+; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
+; CHECK-NEXT: [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]])
+; CHECK-NEXT: br label [[FOR_INC16:%.*]]
+; CHECK: for.inc16:
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP11]], 2
+; CHECK-NEXT: store i32 [[ADD17]], ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND10]]
+; CHECK: for.end18:
+; CHECK-NEXT: ret void
+;
+entry:
+ %array = alloca ptr, align 8
+ %i = alloca i32, align 4
+ %call = call noalias ptr @malloc(i64 noundef 40000) #3
+ store ptr %call, ptr %array, align 8
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond:
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %1 = load i32, ptr %i, align 4
+ %2 = load ptr, ptr %array, align 8
+ %3 = load i32, ptr %i, align 4
+ %idxprom = sext i32 %3 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom
+ store i32 %1, ptr %arrayidx, align 4
+ br label %for.inc
+
+for.inc:
+ %4 = load i32, ptr %i, align 4
+ %add = add nsw i32 %4, 2
+ store i32 %add, ptr %i, align 4
+ br label %for.cond
+
+for.end:
+ store i32 0, ptr %i, align 4
+ br label %for.cond1
+
+for.cond1:
+ %5 = load i32, ptr %i, align 4
+ %cmp2 = icmp slt i32 %5, 10000
+ br i1 %cmp2, label %for.body3, label %for.end9
+
+for.body3:
+ %6 = load ptr, ptr %array, align 8
+ %7 = load i32, ptr %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %arrayidx5 = getelementptr inbounds i32, ptr %6, i64 %idxprom4
+ %8 = load i32, ptr %arrayidx5, align 4
+ %add6 = add nsw i32 %8, 1
+ store i32 %add6, ptr %arrayidx5, align 4
+ br label %for.inc7
+
+for.inc7:
+ %9 = load i32, ptr %i, align 4
+ %add8 = add nsw i32 %9, 2
+ store i32 %add8, ptr %i, align 4
+ br label %for.cond1
+
+for.end9:
+ store i32 0, ptr %i, align 4
+ br label %for.cond10
+
+for.cond10:
+ %10 = load i32, ptr %i, align 4
+ %cmp11 = icmp slt i32 %10, 10000
+ br i1 %cmp11, label %for.body12, label %for.end18
+
+for.body12:
+ %11 = load ptr, ptr %array, align 8
+ %12 = load i32, ptr %i, align 4
+ %idxprom13 = sext i32 %12 to i64
+ %arrayidx14 = getelementptr inbounds i32, ptr %11, i64 %idxprom13
+ %13 = load i32, ptr %arrayidx14, align 4
+ %call15 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %13)
+ br label %for.inc16
+
+for.inc16:
+ %14 = load i32, ptr %i, align 4
+ %add17 = add nsw i32 %14, 2
+ store i32 %add17, ptr %i, align 4
+ br label %for.cond10
+
+for.end18:
+ ret void
+}
+
+
+define dso_local void @pthread_test(){
+; TUNIT-LABEL: define dso_local void @pthread_test() {
+; TUNIT-NEXT: [[ARG1:%.*]] = alloca i8, align 8
+; TUNIT-NEXT: [[THREAD:%.*]] = alloca i64, align 8
+; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
+; TUNIT-NEXT: [[F1:%.*]] = alloca i8, i32 4, align 4
+; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef)
+; TUNIT-NEXT: [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; TUNIT-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
+; TUNIT-NEXT: ret void
+;
+; CGSCC-LABEL: define dso_local void @pthread_test() {
+; CGSCC-NEXT: [[ARG1:%.*]] = alloca i8, align 8
+; CGSCC-NEXT: [[THREAD:%.*]] = alloca i64, align 8
+; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
+; CGSCC-NEXT: [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CGSCC-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(12) [[F]])
+; CGSCC-NEXT: [[F2:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CGSCC-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
+; CGSCC-NEXT: ret void
+;
+ %arg1 = alloca i8, align 8
+ %thread = alloca i64, align 8
+ %call1 = call i32 @pthread_create(ptr nonnull %thread, ptr null, ptr nonnull @pthread_allocation_should_remain_same, ptr %arg1)
+ %f = alloca %struct.Foo, align 4
+ %call2 = call i32 @pthread_create(ptr nonnull %thread, ptr null, ptr nonnull @pthread_allocation_should_be_reduced, ptr %f)
+ %f2 = alloca %struct.Foo, align 4
+ %call3 = call i32 @pthread_create(ptr nonnull %thread, ptr null, ptr nonnull @pthread_check_captured_pointer, ptr %f2)
+ ret void
+}
+
+define internal ptr @pthread_allocation_should_remain_same(ptr %arg) {
+; CHECK-LABEL: define internal noundef nonnull align 8 dereferenceable(1) ptr @pthread_allocation_should_remain_same
+; CHECK-SAME: (ptr noundef nonnull returned align 8 dereferenceable(1) [[ARG:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, ptr noundef nonnull align 8 dereferenceable(1) [[ARG]])
+; CHECK-NEXT: ret ptr [[ARG]]
+;
+entry:
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str, ptr noundef %arg)
+ ret ptr %arg
+}
+
+define internal void @pthread_allocation_should_be_reduced(ptr %arg) {
+;
+; TUNIT-LABEL: define internal void @pthread_allocation_should_be_reduced
+; TUNIT-SAME: (ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG:%.*]]) {
+; TUNIT-NEXT: entry:
+; TUNIT-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 undef)
+; TUNIT-NEXT: ret void
+;
+; CGSCC-LABEL: define internal void @pthread_allocation_should_be_reduced
+; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(12) [[ARG:%.*]]) {
+; CGSCC-NEXT: entry:
+; CGSCC-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG]], align 4
+; CGSCC-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; CGSCC-NEXT: ret void
+;
+entry:
+ %field1 = getelementptr inbounds %struct.Foo, ptr %arg, i32 0, i32 0
+ %0 = load i32, ptr %field1, align 4
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+ ret void
+}
+
+define internal void @pthread_check_captured_pointer(ptr %arg){
+; CHECK-LABEL: define internal void @pthread_check_captured_pointer
+; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(12) [[ARG:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @external_call(ptr noundef nonnull align 4 dereferenceable(12) [[ARG]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %field1 = getelementptr inbounds %struct.Foo, ptr %arg, i32 0, i32 0
+ call void @external_call(ptr %field1)
+ ret void
+}
+
+
+declare external void @external_call(ptr)
+
+declare !callback !0 dso_local i32 @pthread_create(ptr, ptr, ptr, ptr)
+!1 = !{i64 2, i64 3, i1 false}
+!0 = !{!1}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: nounwind allocsize(0)
+declare noalias ptr @malloc(i64 noundef) #1
+;.
+; CHECK: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!1}
+; CHECK: [[META1:![0-9]+]] = !{i64 2, i64 3, i1 false}
+;.
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 370d72d99e736f3..4f267a7abe305ee 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -503,6 +503,7 @@ define i32 @malloc_in_loop(i32 %arg) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[I1:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[I11:%.*]] = alloca i8, i32 0, align 8
; CHECK-NEXT: store i32 [[ARG]], ptr [[I]], align 4
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index e6c8491070c2073..476e65b4e465382 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -453,6 +453,7 @@ define i32 @malloc_in_loop(i32 %arg) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[I1:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[I11:%.*]] = alloca i8, i32 0, align 8
; CHECK-NEXT: store i32 [[ARG]], ptr [[I]], align 4
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index 06195b97ac5fd99..d6718884ea957de 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2587,9 +2587,9 @@ define void @bad_gep() {
; TUNIT-LABEL: define {{[^@]+}}@bad_gep
; TUNIT-SAME: () #[[ATTR13]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[N:%.*]] = alloca i8, align 1
-; TUNIT-NEXT: [[M:%.*]] = alloca i8, align 1
-; TUNIT-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR18:[0-9]+]]
+; TUNIT-NEXT: [[N1:%.*]] = alloca i8, i32 0, align 1
+; TUNIT-NEXT: [[M2:%.*]] = alloca i8, i32 0, align 1
+; TUNIT-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
; TUNIT-NEXT: br label [[EXIT:%.*]]
; TUNIT: while.body:
; TUNIT-NEXT: unreachable
@@ -2598,16 +2598,16 @@ define void @bad_gep() {
; TUNIT: if.end:
; TUNIT-NEXT: unreachable
; TUNIT: exit:
-; TUNIT-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR18]]
+; TUNIT-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18]]
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CGSCC-LABEL: define {{[^@]+}}@bad_gep
; CGSCC-SAME: () #[[ATTR6]] {
; CGSCC-NEXT: entry:
-; CGSCC-NEXT: [[N:%.*]] = alloca i8, align 1
-; CGSCC-NEXT: [[M:%.*]] = alloca i8, align 1
-; CGSCC-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR21:[0-9]+]]
+; CGSCC-NEXT: [[N1:%.*]] = alloca i8, i32 0, align 1
+; CGSCC-NEXT: [[M2:%.*]] = alloca i8, i32 0, align 1
+; CGSCC-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
; CGSCC-NEXT: br label [[EXIT:%.*]]
; CGSCC: while.body:
; CGSCC-NEXT: unreachable
@@ -2616,7 +2616,7 @@ define void @bad_gep() {
; CGSCC: if.end:
; CGSCC-NEXT: unreachable
; CGSCC: exit:
-; CGSCC-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR21]]
+; CGSCC-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21]]
; CGSCC-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index 9d754506c5c9d7c..c28cb2837934815 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -10,6 +10,7 @@ define hidden i64 @f1() align 2 {
; TUNIT-LABEL: define {{[^@]+}}@f1
; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 {
; TUNIT-NEXT: entry:
+; TUNIT-NEXT: [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
; TUNIT-NEXT: ret i64 undef
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -17,6 +18,7 @@ define hidden i64 @f1() align 2 {
; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 {
; CGSCC-NEXT: entry:
; CGSCC-NEXT: [[REF_TMP:%.*]] = alloca [[A:%.*]], align 8
+; CGSCC-NEXT: [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
; CGSCC-NEXT: [[CALL2:%.*]] = call i64 @f2() #[[ATTR2:[0-9]+]]
; CGSCC-NEXT: ret i64 [[CALL2]]
;
More information about the llvm-commits
mailing list