[llvm] [Attributor] New attribute to identify what byte ranges are alive for an allocation (PR #66148)

Vidhush Singhal via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 28 20:58:02 PDT 2023


https://github.com/vidsinghal updated https://github.com/llvm/llvm-project/pull/66148

>From 5e92f240f8b48128c38e2a33389f97407294527c Mon Sep 17 00:00:00 2001
From: vidsinghal <vidush.sl at gmail.com>
Date: Fri, 4 Aug 2023 00:06:56 -0400
Subject: [PATCH] [Attributor] New attribute to identify what byte ranges are
 alive for an allocation

Changes the size of allocations automatically.
Only implements the case when a single range from start of the allocation is alive.

Differential Revision: https://reviews.llvm.org/D157068
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  43 ++
 llvm/lib/Transforms/IPO/Attributor.cpp        |  15 +-
 .../Transforms/IPO/AttributorAttributes.cpp   | 240 +++++++-
 .../Attributor/ArgumentPromotion/crash.ll     |   4 +-
 .../live_called_from_dead.ll                  |   2 +
 .../live_called_from_dead_2.ll                |   2 +
 .../nonzero-address-spaces.ll                 |   1 +
 .../Attributor/IPConstantProp/pthreads.ll     |   8 +-
 llvm/test/Transforms/Attributor/allocator.ll  | 518 ++++++++++++++++++
 .../Transforms/Attributor/heap_to_stack.ll    |   1 +
 .../Attributor/heap_to_stack_gpu.ll           |   1 +
 llvm/test/Transforms/Attributor/liveness.ll   |  16 +-
 llvm/test/Transforms/Attributor/nodelete.ll   |   2 +
 13 files changed, 830 insertions(+), 23 deletions(-)
 create mode 100644 llvm/test/Transforms/Attributor/allocator.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index bd1bd8261123e51..2fc46172c922a8e 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -103,6 +103,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/CFG.h"
@@ -132,6 +133,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ModRef.h"
 #include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/TypeSize.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
 
@@ -6117,6 +6119,12 @@ struct AAPointerInfo : public AbstractAttribute {
   /// See AbstractAttribute::getIdAddr()
   const char *getIdAddr() const override { return &ID; }
 
+  using OffsetBinsTy = DenseMap<AA::RangeTy, SmallSet<unsigned, 4>>;
+  using const_bin_iterator = OffsetBinsTy::const_iterator;
+  virtual const_bin_iterator begin() const = 0;
+  virtual const_bin_iterator end() const = 0;
+  virtual int64_t numOffsetBins() const = 0;
+
   /// Call \p CB on all accesses that might interfere with \p Range and return
   /// true if all such accesses were known and the callback returned true for
   /// all of them, false otherwise. An access interferes with an offset-size
@@ -6270,6 +6278,41 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
   static const char ID;
 };
 
+struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
+  AAAllocationInfo(const IRPosition &IRP, Attributor &A)
+      : StateWrapper<BooleanState, AbstractAttribute>(IRP) {}
+
+  /// See AbstractAttribute::isValidIRPositionForInit
+  static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+    if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+      return false;
+    return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+  }
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAllocationInfo &createForPosition(const IRPosition &IRP,
+                                             Attributor &A);
+
+  virtual std::optional<TypeSize> getAllocatedSize() const = 0;
+
+  /// See AbstractAttribute::getName()
+  const std::string getName() const override { return "AAAllocationInfo"; }
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAllocationInfo
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  constexpr static const std::optional<TypeSize> HasNoAllocationSize =
+      std::optional<TypeSize>(TypeSize(-1, true));
+
+  static const char ID;
+};
+
 /// An abstract interface for llvm::GlobalValue information interference.
 struct AAGlobalValueInfo
     : public StateWrapper<BooleanState, AbstractAttribute> {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 49ced893d5c7340..f1a88bc564ced71 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3611,14 +3611,13 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   };
 
   auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
-  bool Success;
+  [[maybe_unused]] bool Success;
   bool UsedAssumedInformation = false;
   Success = checkForAllInstructionsImpl(
       nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr,
       {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
        (unsigned)Instruction::Call},
       UsedAssumedInformation);
-  (void)Success;
   assert(Success && "Expected the check call to be successful!");
 
   auto LoadStorePred = [&](Instruction &I) -> bool {
@@ -3644,7 +3643,17 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
       nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr,
       {(unsigned)Instruction::Load, (unsigned)Instruction::Store},
       UsedAssumedInformation);
-  (void)Success;
+  assert(Success && "Expected the check call to be successful!");
+
+  // AllocaInstPredicate
+  auto AAAllocationInfoPred = [&](Instruction &I) -> bool {
+    getOrCreateAAFor<AAAllocationInfo>(IRPosition::value(I));
+    return true;
+  };
+
+  Success = checkForAllInstructionsImpl(
+      nullptr, OpcodeInstMap, AAAllocationInfoPred, nullptr, nullptr,
+      {(unsigned)Instruction::Alloca}, UsedAssumedInformation);
   assert(Success && "Expected the check call to be successful!");
 }
 
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index bbb0cfa0eb05fe6..b33255db4745717 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -65,6 +65,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -192,6 +193,7 @@ PIPE_OPERATOR(AAPointerInfo)
 PIPE_OPERATOR(AAAssumptionInfo)
 PIPE_OPERATOR(AAUnderlyingObjects)
 PIPE_OPERATOR(AAAddressSpace)
+PIPE_OPERATOR(AAAllocationInfo)
 PIPE_OPERATOR(AAIndirectCallInfo)
 PIPE_OPERATOR(AAGlobalValueInfo)
 PIPE_OPERATOR(AADenormalFPMath)
@@ -881,11 +883,9 @@ struct AA::PointerInfo::State : public AbstractState {
                          AAPointerInfo::AccessKind Kind, Type *Ty,
                          Instruction *RemoteI = nullptr);
 
-  using OffsetBinsTy = DenseMap<RangeTy, SmallSet<unsigned, 4>>;
-
-  using const_bin_iterator = OffsetBinsTy::const_iterator;
-  const_bin_iterator begin() const { return OffsetBins.begin(); }
-  const_bin_iterator end() const { return OffsetBins.end(); }
+  AAPointerInfo::const_bin_iterator begin() const { return OffsetBins.begin(); }
+  AAPointerInfo::const_bin_iterator end() const { return OffsetBins.end(); }
+  int64_t numOffsetBins() const { return OffsetBins.size(); }
 
   const AAPointerInfo::Access &getAccess(unsigned Index) const {
     return AccessList[Index];
@@ -905,7 +905,7 @@ struct AA::PointerInfo::State : public AbstractState {
   // are all combined into a single Access object. This may result in loss of
   // information in RangeTy in the Access object.
   SmallVector<AAPointerInfo::Access> AccessList;
-  OffsetBinsTy OffsetBins;
+  AAPointerInfo::OffsetBinsTy OffsetBins;
   DenseMap<const Instruction *, SmallVector<unsigned>> RemoteIMap;
 
   /// See AAPointerInfo::forallInterferingAccesses.
@@ -1109,6 +1109,12 @@ struct AAPointerInfoImpl
     return AAPointerInfo::manifest(A);
   }
 
+  virtual const_bin_iterator begin() const override { return State::begin(); }
+  virtual const_bin_iterator end() const override { return State::end(); }
+  virtual int64_t numOffsetBins() const override {
+    return State::numOffsetBins();
+  }
+
   bool forallInterferingAccesses(
       AA::RangeTy Range,
       function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
@@ -6521,7 +6527,7 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
 
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
-        return indicatePessimisticFixpoint();
+    return indicatePessimisticFixpoint();
   }
 
   void trackStatistics() const override {
@@ -12688,6 +12694,224 @@ struct AAAddressSpaceCallSiteArgument final : AAAddressSpaceImpl {
 };
 } // namespace
 
+/// ----------- Allocation Info ----------
+namespace {
+struct AAAllocationInfoImpl : public AAAllocationInfo {
+  AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A)
+      : AAAllocationInfo(IRP, A) {}
+
+  std::optional<TypeSize> getAllocatedSize() const override {
+    assert(isValidState() && "the AA is invalid");
+    return AssumedAllocatedSize;
+  }
+
+  std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
+                                                    const DataLayout &DL) {
+
+    // TODO: implement case for malloc like instructions
+    switch (I->getOpcode()) {
+    case Instruction::Alloca: {
+      AllocaInst *AI = cast<AllocaInst>(I);
+      return AI->getAllocationSize(DL);
+    }
+    default:
+      return std::nullopt;
+    }
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+
+    const IRPosition &IRP = getIRPosition();
+    Instruction *I = IRP.getCtxI();
+
+    // TODO: update check for malloc like calls
+    if (!isa<AllocaInst>(I))
+      return indicatePessimisticFixpoint();
+
+    bool IsKnownNoCapture;
+    if (!AA::hasAssumedIRAttr<Attribute::NoCapture>(
+            A, this, IRP, DepClassTy::OPTIONAL, IsKnownNoCapture))
+      return indicatePessimisticFixpoint();
+
+    const AAPointerInfo *PI =
+        A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
+
+    if (!PI)
+      return indicatePessimisticFixpoint();
+
+    if (!PI->getState().isValidState())
+      return indicatePessimisticFixpoint();
+
+    const DataLayout &DL = A.getDataLayout();
+    const auto AllocationSize = findInitialAllocationSize(I, DL);
+
+    // If allocation size is nullopt, we give up.
+    if (!AllocationSize)
+      return indicatePessimisticFixpoint();
+
+    // For zero sized allocations, we give up.
+    // Since we can't reduce further
+    if (*AllocationSize == 0)
+      return indicatePessimisticFixpoint();
+
+    int64_t BinSize = PI->numOffsetBins();
+
+    // TODO: implement for multiple bins
+    if (BinSize > 1)
+      return indicatePessimisticFixpoint();
+
+    if (BinSize == 0) {
+      auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false));
+      if (!changeAllocationSize(NewAllocationSize))
+        return ChangeStatus::UNCHANGED;
+      return ChangeStatus::CHANGED;
+    }
+
+    // TODO: refactor this to be part of multiple bin case
+    const auto &It = PI->begin();
+
+    // TODO: handle if Offset is not zero
+    if (It->first.Offset != 0)
+      return indicatePessimisticFixpoint();
+
+    uint64_t SizeOfBin = It->first.Offset + It->first.Size;
+
+    if (SizeOfBin >= *AllocationSize)
+      return indicatePessimisticFixpoint();
+
+    auto NewAllocationSize =
+        std::optional<TypeSize>(TypeSize(SizeOfBin * 8, false));
+
+    if (!changeAllocationSize(NewAllocationSize))
+      return ChangeStatus::UNCHANGED;
+
+    return ChangeStatus::CHANGED;
+  }
+
+  /// See AbstractAttribute::manifest(...).
+  ChangeStatus manifest(Attributor &A) override {
+
+    assert(isValidState() &&
+           "Manifest should only be called if the state is valid.");
+
+    Instruction *I = getIRPosition().getCtxI();
+
+    auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue();
+
+    unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
+
+    switch (I->getOpcode()) {
+    // TODO: add case for malloc like calls
+    case Instruction::Alloca: {
+
+      AllocaInst *AI = cast<AllocaInst>(I);
+
+      Type *CharType = Type::getInt8Ty(I->getContext());
+
+      auto *NumBytesToValue = llvm::ConstantInt::get(
+          I->getContext(), llvm::APInt(32, NumBytesToAllocate));
+
+      AllocaInst *NewAllocaInst =
+          new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue,
+                         AI->getAlign(), AI->getName(), AI->getNextNode());
+
+      if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst))
+        return ChangeStatus::CHANGED;
+
+      break;
+    }
+    default:
+      break;
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr(Attributor *A) const override {
+    if (!isValidState())
+      return "allocationinfo(<invalid>)";
+    return "allocationinfo(" +
+           (AssumedAllocatedSize == HasNoAllocationSize
+                ? "none"
+                : std::to_string(AssumedAllocatedSize->getFixedValue())) +
+           ")";
+  }
+
+private:
+  std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
+
+  // Maintain the computed allocation size of the object.
+  // Returns (bool) weather the size of the allocation was modified or not.
+  bool changeAllocationSize(std::optional<TypeSize> Size) {
+    if (AssumedAllocatedSize == HasNoAllocationSize ||
+        AssumedAllocatedSize != Size) {
+      AssumedAllocatedSize = Size;
+      return true;
+    }
+    return false;
+  }
+};
+
+struct AAAllocationInfoFloating : AAAllocationInfoImpl {
+  AAAllocationInfoFloating(const IRPosition &IRP, Attributor &A)
+      : AAAllocationInfoImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FLOATING_ATTR(allocationinfo);
+  }
+};
+
+struct AAAllocationInfoReturned : AAAllocationInfoImpl {
+  AAAllocationInfoReturned(const IRPosition &IRP, Attributor &A)
+      : AAAllocationInfoImpl(IRP, A) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // TODO: we don't rewrite function argument for now because it will need to
+    // rewrite the function signature and all call sites
+    (void)indicatePessimisticFixpoint();
+  }
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FNRET_ATTR(allocationinfo);
+  }
+};
+
+struct AAAllocationInfoCallSiteReturned : AAAllocationInfoImpl {
+  AAAllocationInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
+      : AAAllocationInfoImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSRET_ATTR(allocationinfo);
+  }
+};
+
+struct AAAllocationInfoArgument : AAAllocationInfoImpl {
+  AAAllocationInfoArgument(const IRPosition &IRP, Attributor &A)
+      : AAAllocationInfoImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_ARG_ATTR(allocationinfo);
+  }
+};
+
+struct AAAllocationInfoCallSiteArgument : AAAllocationInfoImpl {
+  AAAllocationInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
+      : AAAllocationInfoImpl(IRP, A) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+
+    (void)indicatePessimisticFixpoint();
+  }
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSARG_ATTR(allocationinfo);
+  }
+};
+} // namespace
+
 const char AANoUnwind::ID = 0;
 const char AANoSync::ID = 0;
 const char AANoFree::ID = 0;
@@ -12721,6 +12945,7 @@ const char AAPointerInfo::ID = 0;
 const char AAAssumptionInfo::ID = 0;
 const char AAUnderlyingObjects::ID = 0;
 const char AAAddressSpace::ID = 0;
+const char AAAllocationInfo::ID = 0;
 const char AAIndirectCallInfo::ID = 0;
 const char AAGlobalValueInfo::ID = 0;
 const char AADenormalFPMath::ID = 0;
@@ -12854,6 +13079,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
 
 CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
 CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
index a42aedd1da0d5e9..595cb37c6c93ec9 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
@@ -107,7 +107,9 @@ define i32 @test_inf_promote_caller(i32 %arg) {
 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CGSCC-NEXT:  bb:
 ; CGSCC-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]], align 8
-; CGSCC-NEXT:    [[TRUETMP1:%.*]] = alloca [[S]], align 8
+; CGSCC-NEXT:    [[TMP3:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT:    [[TMP1:%.*]] = alloca [[S]], align 8
+; CGSCC-NEXT:    [[TMP14:%.*]] = alloca i8, i32 0, align 8
 ; CGSCC-NEXT:    ret i32 0
 ;
 bb:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
index b52e156eb21b9c7..2df81d6cb1832d0 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
@@ -37,6 +37,8 @@ define internal i32 @caller(ptr %B) {
 ; CGSCC-LABEL: define {{[^@]+}}@caller
 ; CGSCC-SAME: () #[[ATTR0]] {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CGSCC-NEXT:    [[A2:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT:    [[A1:%.*]] = alloca i8, i32 0, align 4
 ; CGSCC-NEXT:    ret i32 0
 ;
   %A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index 732a55101e890c5..7c28de24beea279 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -54,6 +54,8 @@ define internal i32 @caller(ptr %B) {
 ; CGSCC-LABEL: define {{[^@]+}}@caller
 ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CGSCC-NEXT:    [[A2:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT:    [[A1:%.*]] = alloca i8, i32 0, align 4
 ; CGSCC-NEXT:    [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i32 0
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
index 36347eef39e38f0..38c1d6099042cbc 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
@@ -30,6 +30,7 @@ define internal i32 @foo(ptr) {
 ; CHECK-SAME: () addrspace(1) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RETVAL1:%.*]] = alloca i8, i32 0, align 4
 ; CHECK-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
 ; CHECK-NEXT:    unreachable
 ;
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
index 5a9fdcc2cb64bbd..ac825468a58c17f 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
@@ -34,13 +34,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define dso_local i32 @main() {
 ; TUNIT-LABEL: define {{[^@]+}}@main() {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[ALLOC1:%.*]] = alloca i8, align 8
-; TUNIT-NEXT:    [[ALLOC2:%.*]] = alloca i8, align 8
+; TUNIT-NEXT:    [[ALLOC11:%.*]] = alloca i8, i32 0, align 8
+; TUNIT-NEXT:    [[ALLOC22:%.*]] = alloca i8, i32 0, align 8
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @foo, ptr nofree readnone align 4294967296 undef)
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @bar, ptr noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef)
-; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @baz, ptr noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]])
-; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @buz, ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]])
+; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @baz, ptr noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC11]])
+; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @buz, ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC22]])
 ; TUNIT-NEXT:    ret i32 0
 ;
 ; CGSCC-LABEL: define {{[^@]+}}@main() {
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
new file mode 100644
index 000000000000000..7072fd5b9e78d3d
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -0,0 +1,518 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs  -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+%struct.Foo = type { i32, i32, i8 }
+
+ at .str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+
+;.
+; CHECK: @[[_STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+;.
+define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_alloca_1
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca i8, i32 4, align 4
+; CHECK-NEXT:    [[F2:%.*]] = alloca i8, i32 4, align 4
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
+; CHECK-NEXT:    store i32 10, ptr [[F2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[F2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[F2]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP1]], [[VAL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[ADD3]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val.addr = alloca i64, align 4
+  %f = alloca %struct.Foo, align 4
+  store i32 %val, ptr %val.addr, align 4
+  %field1 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+  store i32 10, ptr %field1, align 4
+  %field11 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+  %0 = load i32, ptr %field11, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, ptr %field11, align 4
+  %field12 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+  %1 = load i32, ptr %field12, align 4
+  %2 = load i32, ptr %val.addr, align 4
+  %add3 = add nsw i32 %1, %2
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %add3)
+  ret void
+}
+
+; TODO: change malloc like call
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_malloc_1(ptr noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_malloc_1
+; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 12)
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[CALL]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val.addr = alloca ptr, align 8
+  %f = alloca ptr, align 8
+  store ptr %val, ptr %val.addr, align 8
+  %call = call noalias ptr @malloc(i64 noundef 12) #3
+  store ptr %call, ptr %f, align 8
+  %0 = load ptr, ptr %val.addr, align 8
+  %1 = load i32, ptr %0, align 4
+  %add = add nsw i32 %1, 10
+  %2 = load ptr, ptr %f, align 8
+  %a = getelementptr inbounds %struct.Foo, ptr %2, i32 0, i32 0
+  store i32 %add, ptr %a, align 4
+  %3 = load ptr, ptr %f, align 8
+  %a1 = getelementptr inbounds %struct.Foo, ptr %3, i32 0, i32 0
+  %4 = load i32, ptr %a1, align 4
+  %call2 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %4)
+  ret void
+}
+
+; TODO: change malloc like call
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_malloc_2(ptr noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_malloc_2
+; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 60)
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[CALL]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val.addr = alloca ptr, align 8
+  %x = alloca i32, align 4
+  %f = alloca ptr, align 8
+  store ptr %val, ptr %val.addr, align 8
+  store i32 15, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  %conv = sext i32 %0 to i64
+  %mul = mul i64 4, %conv
+  %call = call noalias ptr @malloc(i64 noundef %mul)
+  store ptr %call, ptr %f, align 8
+  %1 = load ptr, ptr %val.addr, align 8
+  %2 = load i32, ptr %1, align 4
+  %3 = load ptr, ptr %f, align 8
+  %arrayidx = getelementptr inbounds i32, ptr %3, i64 0
+  store i32 %2, ptr %arrayidx, align 4
+  %4 = load ptr, ptr %f, align 8
+  %arrayidx1 = getelementptr inbounds i32, ptr %4, i64 0
+  %5 = load i32, ptr %arrayidx1, align 4
+  %call2 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %5)
+  ret void
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local ptr @negative_test_escaping_pointer(i32 noundef %val) #0 {
+; CHECK-LABEL: define dso_local ptr @negative_test_escaping_pointer
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 16)
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT:    store i32 2, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 10, [[VAL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
+; CHECK-NEXT:    store i32 [[ADD2]], ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT:    ret ptr [[TMP3]]
+;
+entry:
+  %val.addr = alloca i32, align 4
+  %f = alloca ptr, align 8
+  store i32 %val, ptr %val.addr, align 4
+  %call = call noalias ptr @malloc(i64 noundef 16) #2
+  store ptr %call, ptr %f, align 8
+  %0 = load ptr, ptr %f, align 8
+  %field1 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 0
+  store i32 2, ptr %field1, align 8
+  %1 = load i32, ptr %val.addr, align 4
+  %add = add nsw i32 10, %1
+  %2 = load ptr, ptr %f, align 8
+  %field11 = getelementptr inbounds %struct.Foo, ptr %2, i32 0, i32 0
+  %3 = load i32, ptr %field11, align 8
+  %add2 = add nsw i32 %3, %add
+  store i32 %add2, ptr %field11, align 8
+  %4 = load ptr, ptr %f, align 8
+  ret ptr %4
+}
+
+
+;TODO: The allocation can be reduced here.
+;However, the offsets (load/store etc.) Need to be changed.
+; Function Attrs: noinline nounwind uwtable
+define dso_local { i64, ptr } @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define dso_local { i64, ptr } @positive_test_not_a_single_start_offset
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CHECK-NEXT:    store i32 2, ptr [[RETVAL]], align 8
+; CHECK-NEXT:    [[FIELD3:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[RETVAL]], i32 0, i32 2
+; CHECK-NEXT:    store ptr [[VAL_ADDR]], ptr [[FIELD3]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load { i64, ptr }, ptr [[RETVAL]], align 8
+; CHECK-NEXT:    ret { i64, ptr } [[TMP0]]
+;
+entry:
+  %retval = alloca %struct.Foo, align 8
+  %val.addr = alloca i32, align 4
+  store i32 %val, ptr %val.addr, align 4
+  %field1 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 0
+  store i32 2, ptr %field1, align 8
+  %field3 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 2
+  store ptr %val.addr, ptr %field3, align 8
+  %0 = load { i64, ptr }, ptr %retval, align 8
+  ret { i64, ptr } %0
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_test_reduce_array_allocation_1() {
+; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca i8, i32 4, align 8
+; CHECK-NEXT:    store i32 0, ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 2
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 1, 2
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %array = alloca ptr, i32 10
+  store i32 0, ptr %array
+  %0 = load i32, ptr %array
+  %1 = add i32 %0, 2
+  store i32 %1, ptr %array
+  %2 = add i32 1, 2
+  %3 = load i32, ptr %array
+  %4 = add i32 %2, %3
+  store i32 %4, ptr %array
+  %5 = load i32, ptr %array
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %5)
+  ret void
+}
+
+
+; Function Attrs: noinline nounwind uwtable
+; TODO: Here the array size is not known at compile time.
+; However the array does not escape and is only partially used.
+; Should the optimization reduce the allocation size regardless? Based on AAPointerInfo.
+define dso_local void @baz(ptr noundef %val, i32 noundef %arrayLength) #0 {
+; CHECK-LABEL: define dso_local void @baz
+; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]], i32 noundef [[ARRAYLENGTH:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[ARRAYLENGTH_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT:    store i32 [[ARRAYLENGTH]], ptr [[ARRAYLENGTH_ADDR]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[ARRAYLENGTH]] to i64
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 4, [[CONV]]
+; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef [[MUL]])
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[CALL]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val.addr = alloca ptr, align 8
+  %arrayLength.addr = alloca i32, align 4
+  %f = alloca ptr, align 8
+  store ptr %val, ptr %val.addr, align 8
+  store i32 %arrayLength, ptr %arrayLength.addr, align 4
+  %0 = load i32, ptr %arrayLength.addr, align 4
+  %conv = sext i32 %0 to i64
+  %mul = mul i64 4, %conv
+  %call = call noalias ptr @malloc(i64 noundef %mul) #3
+  store ptr %call, ptr %f, align 8
+  %1 = load ptr, ptr %val.addr, align 8
+  %2 = load i32, ptr %1, align 4
+  %3 = load ptr, ptr %f, align 8
+  %arrayidx = getelementptr inbounds i32, ptr %3, i64 0
+  store i32 %2, ptr %arrayidx, align 4
+  %4 = load ptr, ptr %f, align 8
+  %arrayidx1 = getelementptr inbounds i32, ptr %4, i64 0
+  %5 = load i32, ptr %arrayidx1, align 4
+  %call2 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %5)
+  ret void
+}
+
+;TODO: Here since only even indexes of the array are part of the output
+;We can reduce the allocation by half and make an array that's accessed contiguously
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @positive_test_reduce_array_allocation_2() #0 {
+; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000)
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[ARRAY]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 2
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
+; CHECK:       for.cond1:
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 10000
+; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]]
+; CHECK:       for.body3:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT:    store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    br label [[FOR_INC7:%.*]]
+; CHECK:       for.inc7:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP7]], 2
+; CHECK-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND1]]
+; CHECK:       for.end9:
+; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND10:%.*]]
+; CHECK:       for.cond10:
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp slt i32 [[TMP8]], 10000
+; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END18:%.*]]
+; CHECK:       for.body12:
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
+; CHECK-NEXT:    [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]])
+; CHECK-NEXT:    br label [[FOR_INC16:%.*]]
+; CHECK:       for.inc16:
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP11]], 2
+; CHECK-NEXT:    store i32 [[ADD17]], ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND10]]
+; CHECK:       for.end18:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %array = alloca ptr, align 8
+  %i = alloca i32, align 4
+  %call = call noalias ptr @malloc(i64 noundef 40000) #3
+  store ptr %call, ptr %array, align 8
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %1 = load i32, ptr %i, align 4
+  %2 = load ptr, ptr %array, align 8
+  %3 = load i32, ptr %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom
+  store i32 %1, ptr %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %4 = load i32, ptr %i, align 4
+  %add = add nsw i32 %4, 2
+  store i32 %add, ptr %i, align 4
+  br label %for.cond
+
+for.end:
+  store i32 0, ptr %i, align 4
+  br label %for.cond1
+
+for.cond1:
+  %5 = load i32, ptr %i, align 4
+  %cmp2 = icmp slt i32 %5, 10000
+  br i1 %cmp2, label %for.body3, label %for.end9
+
+for.body3:
+  %6 = load ptr, ptr %array, align 8
+  %7 = load i32, ptr %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %arrayidx5 = getelementptr inbounds i32, ptr %6, i64 %idxprom4
+  %8 = load i32, ptr %arrayidx5, align 4
+  %add6 = add nsw i32 %8, 1
+  store i32 %add6, ptr %arrayidx5, align 4
+  br label %for.inc7
+
+for.inc7:
+  %9 = load i32, ptr %i, align 4
+  %add8 = add nsw i32 %9, 2
+  store i32 %add8, ptr %i, align 4
+  br label %for.cond1
+
+for.end9:
+  store i32 0, ptr %i, align 4
+  br label %for.cond10
+
+for.cond10:
+  %10 = load i32, ptr %i, align 4
+  %cmp11 = icmp slt i32 %10, 10000
+  br i1 %cmp11, label %for.body12, label %for.end18
+
+for.body12:
+  %11 = load ptr, ptr %array, align 8
+  %12 = load i32, ptr %i, align 4
+  %idxprom13 = sext i32 %12 to i64
+  %arrayidx14 = getelementptr inbounds i32, ptr %11, i64 %idxprom13
+  %13 = load i32, ptr %arrayidx14, align 4
+  %call15 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %13)
+  br label %for.inc16
+
+for.inc16:
+  %14 = load i32, ptr %i, align 4
+  %add17 = add nsw i32 %14, 2
+  store i32 %add17, ptr %i, align 4
+  br label %for.cond10
+
+for.end18:
+  ret void
+}
+
+
+define dso_local void @pthread_test(){
+; TUNIT-LABEL: define dso_local void @pthread_test() {
+; TUNIT-NEXT:    [[ARG1:%.*]] = alloca i8, align 8
+; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
+; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
+; TUNIT-NEXT:    [[F1:%.*]] = alloca i8, i32 4, align 4
+; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef)
+; TUNIT-NEXT:    [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC-LABEL: define dso_local void @pthread_test() {
+; CGSCC-NEXT:    [[ARG1:%.*]] = alloca i8, align 8
+; CGSCC-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
+; CGSCC-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
+; CGSCC-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CGSCC-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(12) [[F]])
+; CGSCC-NEXT:    [[F2:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CGSCC-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
+; CGSCC-NEXT:    ret void
+;
+  %arg1 = alloca i8, align 8
+  %thread = alloca i64, align 8
+  %call1 = call i32 @pthread_create(ptr nonnull %thread, ptr null, ptr nonnull @pthread_allocation_should_remain_same, ptr %arg1)
+  %f = alloca %struct.Foo, align 4
+  %call2 = call i32 @pthread_create(ptr nonnull %thread, ptr null, ptr nonnull @pthread_allocation_should_be_reduced, ptr %f)
+  %f2 = alloca %struct.Foo, align 4
+  %call3 = call i32 @pthread_create(ptr nonnull %thread, ptr null, ptr nonnull @pthread_check_captured_pointer, ptr %f2)
+  ret void
+}
+
+define internal ptr @pthread_allocation_should_remain_same(ptr %arg) {
+; CHECK-LABEL: define internal noundef nonnull align 8 dereferenceable(1) ptr @pthread_allocation_should_remain_same
+; CHECK-SAME: (ptr noundef nonnull returned align 8 dereferenceable(1) [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, ptr noundef nonnull align 8 dereferenceable(1) [[ARG]])
+; CHECK-NEXT:    ret ptr [[ARG]]
+;
+entry:
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, ptr noundef %arg)
+  ret ptr %arg
+}
+
+define internal void @pthread_allocation_should_be_reduced(ptr %arg) {
+;
+; TUNIT-LABEL: define internal void @pthread_allocation_should_be_reduced
+; TUNIT-SAME: (ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG:%.*]]) {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 undef)
+; TUNIT-NEXT:    ret void
+;
+; CGSCC-LABEL: define internal void @pthread_allocation_should_be_reduced
+; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(12) [[ARG:%.*]]) {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG]], align 4
+; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; CGSCC-NEXT:    ret void
+;
+entry:
+  %field1 = getelementptr inbounds %struct.Foo, ptr %arg, i32 0, i32 0
+  %0 = load i32, ptr %field1, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+define internal void @pthread_check_captured_pointer(ptr %arg){
+; CHECK-LABEL: define internal void @pthread_check_captured_pointer
+; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(12) [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @external_call(ptr noundef nonnull align 4 dereferenceable(12) [[ARG]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %field1 = getelementptr inbounds %struct.Foo, ptr %arg, i32 0, i32 0
+  call void @external_call(ptr %field1)
+  ret void
+}
+
+
+declare external void @external_call(ptr)
+
+declare !callback !0 dso_local i32 @pthread_create(ptr, ptr, ptr, ptr)
+!1 = !{i64 2, i64 3, i1 false}
+!0 = !{!1}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: nounwind allocsize(0)
+declare noalias ptr @malloc(i64 noundef) #1
+;.
+; CHECK: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!1}
+; CHECK: [[META1:![0-9]+]] = !{i64 2, i64 3, i1 false}
+;.
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 370d72d99e736f3..4f267a7abe305ee 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -503,6 +503,7 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[I1:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[I11:%.*]] = alloca i8, i32 0, align 8
 ; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index e6c8491070c2073..476e65b4e465382 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -453,6 +453,7 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[I1:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[I11:%.*]] = alloca i8, i32 0, align 8
 ; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index 06195b97ac5fd99..d6718884ea957de 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2587,9 +2587,9 @@ define void @bad_gep() {
 ; TUNIT-LABEL: define {{[^@]+}}@bad_gep
 ; TUNIT-SAME: () #[[ATTR13]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[N:%.*]] = alloca i8, align 1
-; TUNIT-NEXT:    [[M:%.*]] = alloca i8, align 1
-; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR18:[0-9]+]]
+; TUNIT-NEXT:    [[N1:%.*]] = alloca i8, i32 0, align 1
+; TUNIT-NEXT:    [[M2:%.*]] = alloca i8, i32 0, align 1
+; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
 ; TUNIT-NEXT:    br label [[EXIT:%.*]]
 ; TUNIT:       while.body:
 ; TUNIT-NEXT:    unreachable
@@ -2598,16 +2598,16 @@ define void @bad_gep() {
 ; TUNIT:       if.end:
 ; TUNIT-NEXT:    unreachable
 ; TUNIT:       exit:
-; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR18]]
+; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CGSCC-LABEL: define {{[^@]+}}@bad_gep
 ; CGSCC-SAME: () #[[ATTR6]] {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[N:%.*]] = alloca i8, align 1
-; CGSCC-NEXT:    [[M:%.*]] = alloca i8, align 1
-; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR21:[0-9]+]]
+; CGSCC-NEXT:    [[N1:%.*]] = alloca i8, i32 0, align 1
+; CGSCC-NEXT:    [[M2:%.*]] = alloca i8, i32 0, align 1
+; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
 ; CGSCC-NEXT:    br label [[EXIT:%.*]]
 ; CGSCC:       while.body:
 ; CGSCC-NEXT:    unreachable
@@ -2616,7 +2616,7 @@ define void @bad_gep() {
 ; CGSCC:       if.end:
 ; CGSCC-NEXT:    unreachable
 ; CGSCC:       exit:
-; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR21]]
+; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21]]
 ; CGSCC-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index 9d754506c5c9d7c..c28cb2837934815 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -10,6 +10,7 @@ define hidden i64 @f1() align 2 {
 ; TUNIT-LABEL: define {{[^@]+}}@f1
 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
 ; TUNIT-NEXT:    ret i64 undef
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -17,6 +18,7 @@ define hidden i64 @f1() align 2 {
 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[REF_TMP:%.*]] = alloca [[A:%.*]], align 8
+; CGSCC-NEXT:    [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
 ; CGSCC-NEXT:    [[CALL2:%.*]] = call i64 @f2() #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i64 [[CALL2]]
 ;



More information about the llvm-commits mailing list