Working an an updated patch incorporating this and other feedback, but wanted to reply to a few things directly....<div><br><br><div class="gmail_quote">On Wed, Aug 22, 2012 at 3:04 PM, Duncan Sands <span dir="ltr"><<a href="mailto:baldrick@free.fr" target="_blank">baldrick@free.fr</a>></span> wrote:<br>


<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hi Chandler,<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

Index: test/Transforms/SROA/<u></u>basictest.ll<br>

==============================<u></u>==============================<u></u>=======<br>

--- test/Transforms/SROA/<u></u>basictest.ll   (revision 0)<br>

+++ test/Transforms/SROA/<u></u>basictest.ll   (working copy)<br>

</blockquote>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+define %S2 @test9(%S2 %s2) {<br>

+entry:<br>

+  %new = alloca %S2<br>

+  %s2.next.ptr = extractvalue %S2 %s2, 1<br>

+  %s2.next = load %S2* %s2.next.ptr<br>

+  %s2.next.s1 = extractvalue %S2 %s2.next, 0<br>

+  %s2.next.next = extractvalue %S2 %s2.next, 1<br>

+<br>

+  %new.s1.ptr = getelementptr %S2* %new, i64 0, i32 0<br>

+  store %S1* %s2.next.s1, %S1** %new.s1.ptr<br>

+  %new.next.ptr = getelementptr %S2* %new, i64 0, i32 1<br>

+  store %S2* %s2.next.next, %S2** %new.next.ptr<br>

+<br>

+  %result = load %S2* %new<br>

+  ret %S2 %result<br>

+}<br>

</blockquote>

<br>

you forgot to actually check something in this test.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

--- lib/Transforms/Scalar/SROA.cpp      (revision 0)<br>

+++ lib/Transforms/Scalar/SROA.cpp      (working copy)<br>

@@ -0,0 +1,2245 @@<br>

+//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//<br>

+//<br>

+//                     The LLVM Compiler Infrastructure<br>

+//<br>

+// This file is distributed under the University of Illinois Open Source<br>

+// License. See LICENSE.TXT for details.<br>

+//<br>

+//===------------------------<u></u>------------------------------<u></u>----------------===//<br>

+/// \file<br>

</blockquote>

<br>

\file ?<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// This transformation implements the well known scalar replacement of<br>

+/// aggregates transformation. It tries to identify promotable elements of an<br>

+/// aggregate alloca, and promote them to registers. It will also try to<br>

+/// convert uses of an element (or set of elements) of an alloca into a vector<br>

+/// or bitfield-style integer scalar if appropriate.<br>

+///<br>

+/// It works to do this with minimal slicing of the alloca so that regions<div><br>

+/// which are merely transfered in and out of external memory remain unchanged<br></div>

+/// and are not decomposed to scalar code.<br>

+///<div><br>

+/// Because this also performs alloca promotion, it can be thought of as a also<br>

</div></blockquote>

<br>

as a also -> as also<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// serving the purpose of SSA formation. The algorithm iterates on the<br>

+/// function until all opportunities for promotion have been realized.<br>

+///<br>

+//===------------------------<u></u>------------------------------<u></u>----------------===//<br>

+<br>

+#define DEBUG_TYPE "sroa"<br>

+#include "llvm/Transforms/Scalar.h"<br>

+#include "llvm/Constants.h"<br>

+#include "llvm/DIBuilder.h"<br>

+#include "llvm/DebugInfo.h"<br>

+#include "llvm/DerivedTypes.h"<br>

+#include "llvm/Function.h"<br>

+#include "llvm/GlobalVariable.h"<br>

+#include "llvm/IRBuilder.h"<br>

+#include "llvm/Instructions.h"<br>

+#include "llvm/IntrinsicInst.h"<br>

+#include "llvm/LLVMContext.h"<br>

+#include "llvm/Module.h"<br>

+#include "llvm/Operator.h"<br>

+#include "llvm/Pass.h"<br>

+#include "llvm/ADT/SetVector.h"<br>

+#include "llvm/ADT/SmallVector.h"<br>

+#include "llvm/ADT/Statistic.h"<br>

+#include "llvm/ADT/STLExtras.h"<br>

+#include "llvm/ADT/TinyPtrVector.h"<br>

+#include "llvm/Analysis/Dominators.h"<br>

+#include "llvm/Analysis/Loads.h"<br>

+#include "llvm/Analysis/ValueTracking.<u></u>h"<br>

+#include "llvm/Support/CallSite.h"<br>

+#include "llvm/Support/Debug.h"<br>

+#include "llvm/Support/ErrorHandling.h"<br>

+#include "llvm/Support/<u></u>GetElementPtrTypeIterator.h"<br>

+#include "llvm/Support/InstVisitor.h"<br>

+#include "llvm/Support/MathExtras.h"<br>

+#include "llvm/Support/ValueHandle.h"<br>

+#include "llvm/Support/raw_ostream.h"<br>

+#include "llvm/Target/TargetData.h"<br>

+#include "llvm/Transforms/Utils/Local.<u></u>h"<br>

+#include "llvm/Transforms/Utils/<u></u>PromoteMemToReg.h"<br>

+#include "llvm/Transforms/Utils/<u></u>SSAUpdater.h"<br>

+using namespace llvm;<br>

+<br>

+STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");<br>

+STATISTIC(NumNewAllocas,      "Number of new, smaller allocas introduced");<br>

+STATISTIC(NumPromoted,        "Number of allocas promoted to SSA values");<br>

+STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");<br>

+STATISTIC(NumDeleted,         "Number of instructions deleted");<br>

+STATISTIC(NumVectorized,      "Number of vectorized aggregates");<br>

+<br>

+namespace {<br>

+/// \brief Alloca partitioning representation.<br>

+///<br>

+/// This class represents a partitioning of an alloca into slices,<br>

</blockquote>

<br>

It looks like it isn't a partition since the partition elements can overlap.<br>

Or am I just confused by comments like this:<br>

<br>

  /// We store a vector of the partitions over the alloca here. This vector is<br>

  /// sorted by increasing begin offset, and then by decreasing end offset. See<br>

  /// the Partition inner class for more details.<br>

<br>

which suggests that partition elements are not disjoint (unless this is to allow<br>

for zero length partition elements).  Perhaps it is good to document at which<br>

stages of the logic the partition elements can overlap, and from which point on<br>

they have been made disjoint.<br>

<br>

 and<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// information about the nature of uses of each slice of the alloca. The goal<br>

+/// is that this information is sufficient to decide if and how to split the<br>

+/// alloca apart and replace slices with scalars. It is also intended that this<br>

+/// structure can capture the relevant information needed both due decide about<br>

</blockquote>

<br>

both due -> both to<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// and to enact these transformations.<br>

+class AllocaPartitioning {<br>

+public:<br>

+  struct Partition {<br>

</blockquote>

<br>

Is this an element of a partition, or the partition itself?  Maybe it should be<br>

called Slice or Segment or Interval rather than Partition.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div>

+    uint64_t BeginOffset, EndOffset;<br>

+<br></div>

+    bool IsSplittable;<br>

</blockquote>

<br>

It would be nice to document the fields and methods of struct Partition, in<br>

particular this one.  Also, whether offsets are in bits or bytes etc.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>

+    bool operator<(const Partition &RHS) const {<br>

+      if (BeginOffset < RHS.BeginOffset) return true;<br>

+      if (BeginOffset > RHS.BeginOffset) return false;<br>

+      if (EndOffset > RHS.EndOffset) return true;<br>

+      return false;<br>

+    }<br>

</blockquote>

<br>

This whether this partition is a superset of RHS, so I would have expected ">"<br>

rather than "<".<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    bool operator<(uint64_t RHSOffset) const {<br>

+      return BeginOffset < RHSOffset;<br>

+    }<br>

+    bool operator==(const Partition &RHS) const {<br>

+      return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;<br>

+    }<br>

</blockquote>

<br>

Equality ignores IsSplittable, is this a good idea?  Maybe it is a sign that<br>

IsSplittable shouldn't live in this struct.<br></blockquote><div><br></div><div>That's more because the equality is on the range, not on the particulars of the partition. I've factored the range logic into a base class, lemme know if this is still confusing.</div>


<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    bool operator!=(const Partition &RHS) const { return !operator==(RHS); }<br>

+  };<br>

+<br>

+  struct PartitionUse {<br>

+    uint64_t BeginOffset, EndOffset;<br>

+    AssertingVH<Instruction> User;<br>

+    AssertingVH<Instruction> Ptr;<br>

+<br>

+    bool operator<(const PartitionUse &RHS) const {<br>

+      if (BeginOffset < RHS.BeginOffset) return true;<br>

+      if (BeginOffset > RHS.BeginOffset) return false;<br>

+      if (EndOffset > RHS.EndOffset) return true;<br>

+      return false;<br>

+    }<br>

+    bool operator<(uint64_t RHSOffset) const {<br>

+      return BeginOffset < RHSOffset;<br>

+    }<br>

+    bool operator==(const PartitionUse &RHS) const {<br>

+      return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;<br>

+    }<br>

+    bool operator!=(const PartitionUse &RHS) const { return !operator==(RHS); }<br>

+  };<br>

</blockquote>

<br>

This is exactly the same as struct Partition, only the extra data (User/Ptr) is<br>

different.  So maybe they should both be represented as a common class plus some<br>

extra data.  Some documentation of the data fields would be great.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>

+  AllocaPartitioning(const TargetData &TD, AllocaInst &AI);<br>

+<br>

+  bool isEscaped() const { return PointerEscapingInstr; }<br>

+<br>

+  typedef SmallVectorImpl<Partition>::<u></u>iterator iterator;<br>

+  iterator begin() { return Partitions.begin(); }<br>

+  iterator end() { return Partitions.end(); }<br>

+<br>

+  typedef SmallVectorImpl<Partition>::<u></u>const_iterator const_iterator;<br>

+  const_iterator begin() const { return Partitions.begin(); }<br>

+  const_iterator end() const { return Partitions.end(); }<br>

+<br>

+  typedef SmallVectorImpl<PartitionUse>:<u></u>:iterator use_iterator;<br>

+  use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); }<br>

+  use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }<br>

+  use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }<br>

+  use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }<br>

+  void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) {<br>

+    Uses[Idx].insert(UI, U);<br>

+  }<br>

+  void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) {<br>

+    Uses[I - begin()].insert(UI, U);<br>

+  }<br>

+  void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); }<br>

+  void use_erase(const_iterator I, use_iterator UI) {<br>

+    Uses[I - begin()].erase(UI);<br>

+  }<br>

+<br>

+  typedef SmallVectorImpl<PartitionUse>:<u></u>:const_iterator const_use_iterator;<br>

+  const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); }<br>

+  const_use_iterator use_begin(const_iterator I) const {<br>

+    return Uses[I - begin()].begin();<br>

+  }<br>

+  const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); }<br>

+  const_use_iterator use_end(const_iterator I) const {<br>

+    return Uses[I - begin()].end();<br>

+  }<div><br>

+<br>

+  /// \brief MemTransferInst auxilliary data.<br></div>

+  /// This struct provides some auxilliary data about memory transfer<br>

+  /// intrinsics such as memcpy and memmove. These intrinsics can use two<br>

+  /// different ranges within the same alloca, and provide other challenges to<br>

+  /// correctly representing. We stash extra data to help us untangle this<br>

</blockquote>

<br>

correctly representing -> correctly represent.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  /// after the partitioning is complete.<br>

+  struct MemTransferOffsets {<br>

+    uint64_t DestBegin, DestEnd;<br>

+    uint64_t SourceBegin, SourceEnd;<br>

+    bool IsSplittable;<br>

+  };<br>

+  MemTransferOffsets getMemTransferOffsets(<u></u>MemTransferInst &II) const {<br>

+    return MemTransferInstData.lookup(&<u></u>II);<br>

+  }<br>

+<br>

+  bool hasMultiplePHIOrSelectUses(<u></u>Instruction &I) const {<br>

+    assert(isa<PHINode>(I) || isa<SelectInst>(I));<br>

+    return PHIOrSelectSizes.lookup(&I).<u></u>second;<br>

+  }<br>

+<br>

+  iterator findPartitionForPHIOrSelectOpe<u></u>rand(Instruction &I, Value *Op) {<br>

+    SmallDenseMap<std::pair<<u></u>Instruction *, Value *>,<br>

+                  std::pair<unsigned, unsigned> >::const_iterator MapIt<br>

+      = PHIOrSelectOpMap.find(std::<u></u>make_pair(&I, Op));<br>

+    if (MapIt == PHIOrSelectOpMap.end())<br>

+      return end();<br>

+<br>

+    return begin() + MapIt->second.first;<br>

+  }<br>

+  use_iterator findPartitionUseForPHIOrSelect<u></u>Operand(Instruction &I,<br>

+                                                     Value *Op) {<br>

+    SmallDenseMap<std::pair<<u></u>Instruction *, Value *>,<br>

+                  std::pair<unsigned, unsigned> >::const_iterator MapIt<br>

+      = PHIOrSelectOpMap.find(std::<u></u>make_pair(&I, Op));<br>

+    assert(MapIt != PHIOrSelectOpMap.end());<br>

+    return Uses[MapIt->second.first].<u></u>begin() + MapIt->second.second;<br>

+  }<br>

+<br>

+  Type *getCommonType(iterator I) const;<br>

</blockquote>

<br>

It would be nice to have some documentation for this method (some of the others<br>

above aren't too self-explanatory either...).<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>

+  void print(raw_ostream &OS, const_iterator I, StringRef Indent = "  ") const;<br>

+  void printUsers(raw_ostream &OS, const_iterator I,<br>

+                  StringRef Indent = "  ") const;<br>

+  void print(raw_ostream &OS) const;<br>

+  void dump(const_iterator I) const LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED;<br>

+  void dump() const LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED;<br>

+<br>

+private:<br>

+  template <typename DerivedT, typename RetT = void> class BuilderBase;<br>

+  class PartitionBuilder;<br>

+  friend class AllocaPartitioning::<u></u>PartitionBuilder;<br>

+  class UseBuilder;<br>

+  friend class AllocaPartitioning::<u></u>UseBuilder;<br>

+<br>

+  /// \brief Handle to alloca instruction to simplify method interfaces.<br>

+  AllocaInst &AI;<br>

+<br>

+  /// \brief The instruction responsible for this alloca having no partitioning.<br>

+  /// When an instruction (potentially) escapes the pointer to the alloca, we<br>

+  /// store a pointer to that here and abort trying to partition the alloca.<br>

+  /// This will be null if the alloca is partitioned successfully.<br>

+  Instruction *PointerEscapingInstr;<br>

+<br>

+  /// \brief The partitions of the alloca.<br>

+  ///<br>

+  /// We store a vector of the partitions over the alloca here. This vector is<br>

+  /// sorted by increasing begin offset, and then by decreasing end offset. See<br>

+  /// the Partition inner class for more details.<br>

+  SmallVector<Partition, 8> Partitions;<br>

+<br>

+  /// \brief The uses of the partitions.<br>

+  SmallVector<SmallVector<<u></u>PartitionUse, 2>, 8> Uses;<br>

+<br>

+  SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData;<br>

+<br>

+  SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes;<br>

+  SmallDenseMap<std::pair<<u></u>Instruction *, Value *>,<br>

+                std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap;<br>

+<br>

+  void splitAndMergePartitions();<br>

+};<br>

+}<br>

+<br>

+template <typename DerivedT, typename RetT><br>

+class AllocaPartitioning::<u></u>BuilderBase<br>

+    : public InstVisitor<DerivedT, RetT> {<br>

+public:<br>

+  BuilderBase(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P)<br>

+      : TD(TD),<br>

+        AllocSize(TD.getTypeAllocSize(<u></u>AI.getAllocatedType())),<br>

</blockquote>

<br>

Presumably you only create one of these for alloca's with a sized type?  (Are<br>

alloca's with an unsized type even valid?).  OK, I just saw below that you bail<br>

out early for alloca's with an unsized type.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+        P(P) {<br>

+    enqueueUsers(AI, 0);<br>

+  }<br>

+<br>

+protected:<br>

+  const TargetData &TD;<br>

+  const uint64_t AllocSize;<br>

+  AllocaPartitioning &P;<br>

+<br>

+  struct OffsetUse {<br>

+    Instruction *Use, *Ptr;<br>

+    uint64_t Offset;<br>

+  };<br>

+  SmallVector<OffsetUse, 8> Queue;<br>

+<br>

+  // The active pointer value and offset while visiting.<br>

+  Instruction *Ptr;<br>

+  uint64_t Offset;<br>

+<br>

+  void enqueueUsers(Instruction &I, uint64_t UserOffset) {<br>

+    SmallPtrSet<User *, 8> UserSet;<br>

+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();<div><br>

+         UI != UE; ++UI) {<br></div>

+      if (!UserSet.insert(*UI))<br>

+        continue;<br>

+<br>

+      OffsetUse OU = { cast<Instruction>(*UI), &I, UserOffset };<br>

+      Queue.push_back(OU);<br>

+    }<br>

+  }<br>

+<br>

+  bool computeConstantGEPOffset(<u></u>GetElementPtrInst &GEPI, uint64_t &GEPOffset) {<br>

+    GEPOffset = Offset;<br>

+    for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);<br>

+         GTI != GTE; ++GTI) {<br>

+      ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.<u></u>getOperand());<br>

+      if (!OpC)<br>

+        return false;<br>

+      if (OpC->isZero())<br>

+        continue;<br>

+<br>

+      // Handle a struct index, which adds its field offset to the pointer.<br>

+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {<br>

+        unsigned ElementIdx = OpC->getZExtValue();<br>

+        const StructLayout *SL = TD.getStructLayout(STy);<br>

+        GEPOffset += SL->getElementOffset(<u></u>ElementIdx);<br>

+        continue;<br>

+      }<br>

+<br>

+      GEPOffset<br>

+        += OpC->getZExtValue() * TD.getTypeAllocSize(GTI.<u></u>getIndexedType());<div><br>

+    }<br>

+    return true;<br>

+  }<br>

+<br></div>

+  Value *foldSelectInst(SelectInst &SI) {<br>

+    // If the condition being selected on is a constant or the same value is<br>

+    // being selected between, fold the select. Yes this does (rarely) happen<br>

+    // early on.<br>

</blockquote>

<br>

Indeed!  Scheduling instsimplify before SROA would zap this kind of sillyness.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.<u></u>getCondition()))<br>

+      return SI.getOperand(1+CI->isZero());<br>

+    if (SI.getOperand(1) == SI.getOperand(2)) {<br>

+      assert(Ptr == SI.getOperand(1));<br>

+      return SI.getOperand(1);<br>

+    }<br>

+    return 0;<br>

+  }<br>

+};<br>

+<br>

+/// \brief Builder for the alloca partitioning.<br>

+///<br>

+/// This class builds an alloca partitioning by recursively visiting the uses<br>

+/// of an alloca and splitting the partitions for each load and store at each<br>

+/// offset.<br>

+class AllocaPartitioning::<u></u>PartitionBuilder<br>

+    : public BuilderBase<PartitionBuilder, bool> {<br>

+  friend class InstVisitor<PartitionBuilder, bool>;<br>

+<br>

+  SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;<br>

+<br>

+public:<br>

+  PartitionBuilder(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P)<br>

+      : BuilderBase(TD, AI, P) {}<br>

+<br>

+  /// \brief Run the builder over the allocation.<br>

+  bool operator()() {<br>

+    // Note that we have to re-evaluate size on each trip through the loop as<br>

+    // the queue grows at the tail.<br>

+    for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {<br>

+      Ptr = Queue[Idx].Ptr;<br>

+      Offset = Queue[Idx].Offset;<br>

+      if (!visit(Queue[Idx].Use))<br>

+        return false;<br>

+    }<br>

+    return true;<br>

+  }<br>

+<br>

+private:<br>

+  bool markAsEscaping(Instruction &I) {<br>

+    P.PointerEscapingInstr = &I;<div><br>

+    return false;<br>

+  }<br>

+<br></div>

+  void insertUse(uint64_t Size, bool IsSplittable = false) {<br>

+    uint64_t BeginOffset = Offset, EndOffset = Offset + Size;<br>

+    // See if we can just add a user onto the last slot currently occupied.<br>

+    if (!P.Partitions.empty() &&<br>

+        P.Partitions.back().<u></u>BeginOffset == BeginOffset &&<br>

+        P.Partitions.back().EndOffset == EndOffset) {<br>

+      P.Partitions.back().<u></u>IsSplittable &= IsSplittable;<br>

+      return;<br>

+    }<br>

+<br>

+    Partition New = { BeginOffset, EndOffset, IsSplittable };<br>

+    P.Partitions.push_back(New);<br>

+  }<br>

+<br>

+  bool handleLoadOrStore(Type *Ty, Instruction &I) {<br>

+    uint64_t Size = TD.getTypeAllocSize(Ty);<br>

+<br>

+    // If this memory access can be shown to *statically* extend outside the<br>

+    // bounds of of the allocation, it's behavior is undefined, so simply<br>

+    // ignore the load.<br>

</blockquote>

<br>

This is pretty harsh.  For example if I have a 4 byte aligned i8 alloca and I<br>

load an i32 out of it you are just going to discard the entire load?  I mean,<br>

I can't really complain too much if you do since technically you are right,<br>

but maybe it is better to be nice to users if this harshness doesn't gain you<br>

anything much in practice.<br></blockquote><div><br></div><div>I expect these to only show up in dead code paths where we have shrunk the alloca but not done enough DCE to zap the access. So it seems nice to not block the SROA due to this dead code.</div>


<div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

 We even play some games to handle cases which might run<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    // the risk of overflow.<br>

+    // FIXME: We should instead consider tha pointer to have escaped if this<br>

+    // function is being instrumented for addressing bugs or race conditions.<br>

+    if (Offset > AllocSize || Size > AllocSize || Offset + Size > AllocSize)<br>

</blockquote>

<br>

The last check covers the other two (unless you are worried that the addition<br>

may overflow)?<br></blockquote><div><br></div><div>See the comment about risk of overflow above?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+      return true;<br>

+<br>

+    insertUse(Size);<br>

</blockquote>

<br>

(IsSplittable -> false)  I guess a FCA load/store is partly splittable, but is<br>

not splittable in the sense you want...  You didn't document what splittable<br>

means :)<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div>

+    return true;<br>

+  }<br>

+<br></div>

+  bool visitBitCastInst(BitCastInst &BC) {<br>

+    enqueueUsers(BC, Offset);<div><br>

+    return true;<br>

+  }<br>

+<br></div>

+  bool visitGetElementPtrInst(<u></u>GetElementPtrInst &GEPI) {<br>

+    //unsigned IntPtrWidth = TD->getPointerSizeInBits();<br>

+    //assert(IntPtrWidth == Offset.getBitWidth());<br>

+    uint64_t GEPOffset;<br>

+    if (!computeConstantGEPOffset(<u></u>GEPI, GEPOffset))<br>

+      return markAsEscaping(GEPI);<br>

+<br>

+    enqueueUsers(GEPI, GEPOffset);<div><br>

+    return true;<br>

+  }<br>

+<br>

+  bool visitLoadInst(LoadInst &LI) {<br></div>

+    return handleLoadOrStore(LI.getType()<u></u>, LI);<div><br>

+  }<br>

+<br>

+  bool visitStoreInst(StoreInst &SI) {<br></div>

+    if (SI.getOperand(0) == Ptr)<br>

+      return markAsEscaping(SI);<br>

+<br>

+    return handleLoadOrStore(SI.<u></u>getOperand(0)->getType(), SI);<br>

+  }<div><br>

+<br>

+<br>

+  bool visitMemSetInst(MemSetInst &II) {<br></div>

+    ConstantInt *Length = dyn_cast<ConstantInt>(II.<u></u>getLength());<br>

+    insertUse(Length ? Length->getZExtValue() : AllocSize - Offset, true);<div><br>

+    return true;<br>

+  }<br>

+<br></div>

+  bool visitMemTransferInst(<u></u>MemTransferInst &II) {<br>

+    ConstantInt *Length = dyn_cast<ConstantInt>(II.<u></u>getLength());<br>

+    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;<br>

+    if (!Size)<br>

+      // Zero-length mem transfer intrinsics can be ignored entirely.<br>

+      return true;<br>

+<br>

+    insertUse(Size, true);<br>

+    unsigned NewIdx = P.Partitions.size() - 1;<br>

+<br>

+    MemTransferOffsets &Offsets = P.MemTransferInstData[&II];<br>

+    Offsets.IsSplittable = true;<br>

+    if (Ptr != II.getRawDest()) {<br>

+      assert(Ptr == II.getRawSource());<br>

+      Offsets.SourceBegin = Offset;<br>

+      Offsets.SourceEnd = Offset + Size;<br>

+    } else {<br>

+      Offsets.DestBegin = Offset;<br>

+      Offsets.DestEnd = Offset + Size;<br>

+    }<br>

+<br>

+    SmallDenseMap<Instruction *, unsigned>::const_iterator PMI;<br>

+    bool Inserted = false;<br>

+    llvm::tie(PMI, Inserted)<br>

+      = MemTransferPartitionMap.<u></u>insert(std::make_pair(&II, NewIdx));<br>

+    if (!Inserted) {<br>

+      // We've found a memory transfer intrinsic which refers to the alloca as<br>

+      // both a source and dest. We refuse to split these to simplify splitting<br>

+      // logic. If possible, SROA will still split them into separate allocas<br>

+      // and then re-analyze.<br>

+      Offsets.IsSplittable = false;<br>

+      P.Partitions[PMI->second].<u></u>IsSplittable = false;<br>

+      P.Partitions[NewIdx].<u></u>IsSplittable = false;<div><br>

+    }<br>

+<br>

+    return true;<br>

+  }<br>

+<br></div>

+  // Disable SRoA for any intrinsics except for lifetime invariants.<br>

+  bool visitIntrinsicInst(<u></u>IntrinsicInst &II) {<br>

+    if (II.getIntrinsicID() == Intrinsic::lifetime_start ||<div><br>

+        II.getIntrinsicID() == Intrinsic::lifetime_end) {<br></div>

+      ConstantInt *Length = cast<ConstantInt>(II.<u></u>getArgOperand(0));<br>

+      uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue());<br>

+      insertUse(Size, true);<div><br>

+      return true;<br>

+    }<br>

+<br></div>

+    return markAsEscaping(II);<br>

+  }<br>

+<br>

+  Instruction *hasUnsafePHIOrSelectUse(<u></u>Instruction *Root, uint64_t &Size) {<br>

+    // We consider any PHI or select that results in a direct load or store of<br>

+    // the same offset to be a viable use for partitioning purposes. These uses<br>

+    // are considered unsplittable and the size is the maximum loaded or stored<br>

+    // size.<br>

</blockquote>

<br>

Are these restrictions because otherwise splitting is too hard?<br></blockquote><div><br></div><div>Not really, its a propagation of the fact that loads and stores are not considered splittable.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    SmallVector<std::pair<<u></u>Instruction *, Instruction *>, 4> Uses;<br>

+    Uses.push_back(std::make_pair(<u></u>Ptr, Root));<br>

+    do {<br>

+      Instruction *I, *UsedI;<br>

+      llvm::tie(UsedI, I) = Uses.pop_back_val();<br>

+<br>

+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {<br>

+        Size = std::max(Size, TD.getTypeAllocSize(LI-><u></u>getType()));<br>

+        continue;<br>

+      }<br>

+      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {<br>

+        Value *Op = SI->getOperand(0);<br>

+        if (Op == UsedI)<br>

+          return SI;<br>

+        Size = std::max(Size, TD.getTypeAllocSize(Op-><u></u>getType()));<br>

+        continue;<br>

+      }<br>

+<br>

+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)<u></u>) {<br>

+        if (!GEP->hasAllZeroIndices())<br>

+          return GEP;<br>

+      } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&<br>

+                 !isa<SelectInst>(I)) {<br>

+        return I;<br>

+      }<br>

+<br>

+      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;<br>

+           ++UI)<br>

+        Uses.push_back(std::make_pair(<u></u>I, cast<Instruction>(*UI)));<br>

+    } while (!Uses.empty());<br>

+<br>

+    return 0;<div><br>

+  }<br>

+<br>

+  bool visitPHINode(PHINode &PN) {<br></div>

+    // See if we already have computed info on this node.<br>

+    std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];<br>

+    if (PHIInfo.first) {<br>

+      PHIInfo.second = true;<br>

+      insertUse(PHIInfo.first);<div><br>

+      return true;<br>

+    }<br>

+<br></div>

+    // Check for an unsafe use of the PHI node.<br>

+    if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))<br>

+      return markAsEscaping(*EscapingI);<br>

+<br>

+    insertUse(PHIInfo.first);<div><br>

+    return true;<br>

+  }<br>

+<br>

+  bool visitSelectInst(SelectInst &SI) {<br></div>

+    if (Value *Result = foldSelectInst(SI)) {<br>

+      if (Result == Ptr)<br>

+        // If the result of the constant fold will be the pointer, recurse<br>

+        // through the select as if we had RAUW'ed it.<br>

+        enqueueUsers(SI, Offset);<div><br>

+<br>

+      return true;<br>

+    }<br>

+<br></div>

+    // See if we already have computed info on this node.<br>

+    std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];<br>

+    if (SelectInfo.first) {<br>

+      SelectInfo.second = true;<br>

+      insertUse(SelectInfo.first);<div><br>

+      return true;<br>

+    }<br>

+<br></div>

+    // Check for an unsafe use of the PHI node.<br>

+    if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))<br>

+      return markAsEscaping(*EscapingI);<br>

+<br>

+    insertUse(SelectInfo.first);<div><br>

+    return true;<br>

+  }<br>

+<br></div>

+  /// \brief Disable SROA entirely if there are unhandled users of the alloca.<br>

+  bool visitInstruction(Instruction &I) { return markAsEscaping(I); }<br>

+};<br>

+<br>

+<br>

+/// \brief Use adder for the alloca partitioning.<br>

+///<br>

+/// This class adds the uses of an alloca to all of the partitions which it<br>

+/// uses. For splittable partitions, this can end up doing essentially a linear<br>

+/// walk of the partitions, but the number of steps remains bounded by the<br>

+/// total result instruction size:<br>

+/// - The number of partitions is a result of the number unsplittable<br>

+///   instructions using the alloca.<br>

+/// - The number of users of each partition is at worst the total number of<br>

+///   splittable instructions using the alloca.<br>

</blockquote>

<br>

Isn't it the number of splittable instructions plus one (the unsplittable<br>

instruction that caused the partition to exist in the first place)?<br></blockquote><div><br></div><div>Well, yes, but O(N+1) is equivalent to O(N).... I'm establishing the bounds that grow.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// Thus we will produce N * M instructions in the end, where N are the number<br>

+/// of unsplittable uses and M are the number of splittable. This visitor does<br>

+/// the exact same number of updates to the partitioning.<br>

+///<br>

+/// In the more common case, this visitor will leverage the fact that the<br>

+/// partition space is pre-sorted, and do a logarithmic search for the<br>

+/// partition needed, making the total visit a classical ((N + M) * log(N))<br>

+/// complexity operation.<br>

</blockquote>

<br>

<br>

... chopped bits I didn't get round to commenting on yet ...<br>

<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// \brief Try to find a partition of the aggregate type pasesd in for a given<br>

</blockquote>

<br>

pasesd -> passed<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// offset and size.<br>

+///<br>

+/// This recurses through the aggregate type and tries to compute a subtype<br>

+/// based on the offset and size. When the offset and size span a sub-section<br>

+/// of an array, it will even compute a new array type for that sub-section.<br>

+static Type *getTypePartition(const TargetData &TD, Type *Ty,<br>

+                              uint64_t Offset, uint64_t Size) {<br>

+  if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)<br>

+    return Ty;<br>

+<br>

+  if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {<br>

+    Type *ElementTy = SeqTy->getElementType();<br>

+    uint64_t ElementSize = TD.getTypeAllocSize(ElementTy)<u></u>;<br>

+    uint64_t NumSkippedElements = Offset / ElementSize;<br>

+    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))<br>

+      if (NumSkippedElements > ArrTy->getNumElements())<br>

+        return 0;<br>

</blockquote>

<br>

Shouldn't this be NumSkippedElements >= ArrTy->getNumElements() ?<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))<br>

+      if (NumSkippedElements > VecTy->getNumElements())<br>

+        return 0;<br>

</blockquote>

<br>

Likewise.<br></blockquote><div><br></div><div>Good catch.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

If this sequential type is a pointer type, the following logic is going to come<br>

to a strange conclusion.  For example you have a [1024xi8]* as the type.  That's<br>

only 32/64 bits wide no matter how big the element type is...  Yet here you are<br>

subtracting off multiples of the element type size.  Pointer types should be<br>

handled like integers and other scalar types as far as I can see.<br></blockquote><div><br></div><div>Yea, I never intended this code to fire on pointers. Fixed.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    Offset -= NumSkippedElements * ElementSize;<br>

+<br>

+    // First check if we need to recurse.<br>

+    if (Offset > 0 || Size < ElementSize) {<br>

+      // Bail if the partition ends in a different array element.<br>

+      if ((Offset + Size) > ElementSize)<br>

+        return 0;<br>

+      // Bail if this is a poniter element, we can't recurse through them.<br>

+      if (ElementTy->isPointerTy())<br>

+        return 0;<br>

</blockquote>

<br>

This seems to be confusing SeqTy being a pointer type and ElementTy being a<br>

pointer type.  I don't see the need for this pointer type check.<br></blockquote><div><br></div><div>Yea, same bug as above. I was checking at the wrong place.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+      // Recurse through the element type trying to peel off offset bytes.<br>

+      return getTypePartition(TD, ElementTy, Offset, Size);<br>

+    }<br>

+    assert(Offset == 0);<br>

</blockquote>

<br>

Local analysis shows that this assertion can't fail.  I guess it can be<br>

considered documentation.<br></blockquote><div><br></div><div>Yes, I use assertions in several places as local documentation of invariants.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>

+    if (Size == ElementSize)<br>

+      return ElementTy;<br>

+    assert(Size > ElementSize);<br>

</blockquote>

<br>

Likewise for this assert.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    uint64_t NumElements = Size / ElementSize;<br>

+    if (NumElements * ElementSize != Size)<br>

+      return 0;<br>

+    return ArrayType::get(ElementTy, NumElements);<br>

+  }<br>

+<br>

+  StructType *STy = dyn_cast<StructType>(Ty);<br>

+  if (!STy)<br>

+    return 0;<br>

+<br>

+  for (StructType::element_iterator EI = STy->element_begin(),<br>

+                                    EE = STy->element_end();<br>

+       EI != EE; ++EI) {<br>

</blockquote>

<br>

There's no need to loop.  Using StructLayout you can leap straight to the right<br>

field, both for the start and the end.<br></blockquote><div><br></div><div>All this code was bogus and has been replaced with proper code.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">+    // Try to build up a sub-structure.<br>

</blockquote>

<br>

The following logic is only correct for packed structs.  I suggest instead you<br>

use StructLayout to work out the fields you start and end in, and also to check<br>

that you don't start partially inside the start or end partially inside the end,<br>

then grab that range of fields to make the new struct, which should be packed<br>

iff the original one was.  I suggest you also add an assertion that the size of<br>

the new struct is OK, and some testcases for structs with alignment padding etc.<br></blockquote><div><br></div><div>While I had already switched to StructLayout, I agree this is more tricky. =] I've adjusted the code. I'm still really behind working up test cases... lemme know if you have any ideas handy.</div>


<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    SmallVector<Type *, 4> ElementTys;<br>

+    ElementTys.push_back(<u></u>ElementTy);<br>

+    Size -= ElementSize;<br>

+    while (++EI != EE) {<br>

+      if (Size == 0)<br>

+        return StructType::get(ElementTy-><u></u>getContext(), ElementTys);<br>

+      ElementSize = TD.getTypeAllocSize(*EI);<br>

+      if (Size < ElementSize)<br>

+        return 0;<br>

+      Size -= ElementSize;<br>

+    }<br>

+    return 0;<br>

+  }<br>

+  return 0;<br>

+}<br>

+<br>

+bool SROA::rewriteAllocaPartition(<u></u>AllocaInst &AI,<br>

+                                  AllocaPartitioning &P,<br>

+                                  AllocaPartitioning::iterator PI) {<br>

+  uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset;<br>

+  assert(P.use_begin(PI) != P.use_end(PI));<br>

+<br>

+  // Try to compute a friendly type for this partition of the alloca. This<br>

+  // won't always succeed, in which case we fall back to a legal integer type<br>

+  // or an i8 array of an appropriate size.<br>

+  Type *AllocaTy = 0;<br>

+  if (Type *PartitionTy = P.getCommonType(PI))<br>

+    if (TD->getTypeAllocSize(<u></u>PartitionTy) == AllocaSize)<br>

+      AllocaTy = PartitionTy;<br>

+  if (!AllocaTy && AI.getAllocatedType()-><u></u>isSized())<br>

</blockquote>

<br>

At this point you already know that the the alloca type is sized, right?<br><br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(),<br>

+                                             PI->BeginOffset, AllocaSize))<br>

+      AllocaTy = PartitionTy;<br>

+  if ((!AllocaTy ||<br>

+       (AllocaTy->isArrayTy() &&<br>

+        AllocaTy->getArrayElementType(<u></u>)->isIntegerTy())) &&<br>

+      TD->isLegalInteger(AllocaSize * 8))<br>

+    AllocaTy = Type::getIntNTy(*C, AllocaSize * 8);<br>

+  if (!AllocaTy)<br>

+    AllocaTy = ArrayType::get(Type::<u></u>getInt8Ty(*C), AllocaSize);<br>

+<br>

+  // Check for the case where we're going to rewrite to a new alloca of the<br>

+  // exact same type as the original, and with the same access offsets. In that<br>

+  // case, re-use the existing alloca, but still run through the rewriter to<br>

+  // performe phi and select speculation.<br>

+  AllocaInst *NewAI;<br>

+  if (AllocaTy == AI.getAllocatedType()) {<br>

</blockquote>

<br>

Is this case actually possible?  How can it occur?</blockquote><div><br></div><div>It happens for every scalar (or vector) alloca which has a phi or select blocking trivial SROA?</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    assert(PI->BeginOffset == 0 &&<br>

+           "Non-zero begin offset but same alloca type");<br>

+    assert(PI == P.begin() && "Begin offset is zero on later partition");<br>

+    NewAI = &AI;<br>

+  } else {<br>

+    // FIXME: The alignment here is almost certainly wrong, but it matches what<br>

+    // the old SROA pass used.<br>

</blockquote>

<br>

It isn't wrong as far as I can see.  The new memory may be more aligned than the<br>

old memory slice inside the original alloca, but that's OK.  It would be wrong<br>

if it was less aligned than the old.<br></blockquote><div><br></div><div>Sorry, I really meant "suboptimal". We often know that much less alignment is required. Maybe this doesn't matter that much in practice. Made comment more clear.</div>


<div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(),<br>

+                           AI.getName() + ".sroa." + Twine(PI - P.begin()),<br>

+                           &AI);<br>

+    ++NumNewAllocas;<br>

+  }<br>

+<br>

+  DEBUG(dbgs() << "Rewriting alloca partition "<br>

+               << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: "<br>

+               << *NewAI << "\n");<br>

+<br>

+  AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI,<br>

+                                   PI->BeginOffset, PI->EndOffset);<br>

+  DEBUG(dbgs() << "  rewriting ");<br>

+  DEBUG(P.print(dbgs(), PI, ""));<br>

+  if (Rewriter.visitUsers(P.use_<u></u>begin(PI), P.use_end(PI))) {<br>

+    DEBUG(dbgs() << "  and queuing for promotion\n");<br>

+    PromotableAllocas.push_back(<u></u>NewAI);<br>

+  } else if (NewAI != &AI) {<br>

+    // If we can't promote the alloca, iterate on it to check for new<br>

+    // refinements exposed by splitting the current alloca. Don't iterate on an<br>

+    // alloca which didn't actually change and didn't get promoted.<br>

+    Worklist.insert(NewAI);<div><br>

+  }<br>

+  return true;<br>

+}<br>

+<br></div>

+bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) {<br>

+  bool Changed = false;<br>

+  for (AllocaPartitioning::iterator PI = P.begin(), PE = P.end(); PI != PE;<br>

+       ++PI)<br>

+    Changed |= rewriteAllocaPartition(AI, P, PI);<br>

+<br>

+  return Changed;<br>

+}<br>

+<br>

+bool SROA::runOnAlloca(AllocaInst &AI) {<br>

+  DEBUG(dbgs() << "SROA alloca: " << AI << "\n");<br>

+  ++NumAllocasAnalyzed;<br>

+<br>

+  // Special case dead allocas, as they're trivial.<br>

+  if (AI.use_empty()) {<br>

+    AI.eraseFromParent();<div><br>

+    return true;<br>

+  }<br>

+<br></div>

+  // Skip alloca forms that this analysis can't handle.<br>

+  if (AI.isArrayAllocation() || !AI.getAllocatedType()-><u></u>isSized() ||<br>

+      TD->getTypeAllocSize(AI.<u></u>getAllocatedType()) == 0)<br>

+    return false;<br>

+<br>

+  // First check if this is a non-aggregate type that we should simply promote.<br>

+  if (!AI.getAllocatedType()-><u></u>isAggregateType() && isAllocaPromotable(&AI)) {<br>

+    DEBUG(dbgs() << "  Trivially scalar type, queuing for promotion...\n");<br>

+    PromotableAllocas.push_back(&<u></u>AI);<div><br>

+    return false;<br>

+  }<br>

+<br></div>

+  // Build the partition set using a recursive instruction-visiting builder.<br>

+  AllocaPartitioning P(*TD, AI);<br>

+  DEBUG(P.print(dbgs()));<br>

+  if (P.isEscaped())<br>

+    return false;<br>

+<div><br>

+  // No partitions to split. Leave the dead alloca for a later pass to clean up.<br>

</div></blockquote>

<br>

Couldn't it be added to DeadInsts?<br></blockquote><div><br></div><div>It could, but only if it is trivially dead. It didn't seem likely enough to be worth adding code to delete the users (which don't form a partition) as well as the alloca....</div>


<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div>

+  if (P.begin() == P.end())<br></div>

+    return false;<br>

+<br>

+  return splitAlloca(AI, P);<br>

+}<br>

+<br>

+void SROA::deleteDeadInstructions() {<br>

+  DeadSplitInsts.clear();<br>

</blockquote>

<br>

This seems like a strange place to clear DeadSplitInsts.<br></blockquote><div><br></div><div>We use DeadSplitInsts as a set to guard multiple insertions to DeadInsts. Once deleted, they're no longer possible to be multiply inserted. And worse, it could cause bugs because of ABA-like issues</div>


<div><br></div><div>Clearly the field needs better comments. =]</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  while (!DeadInsts.empty()) {<br>

+    Instruction *I = DeadInsts.pop_back_val();<br>

+    DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");<br>

+<br>

+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)<br>

+      if (Instruction *U = dyn_cast<Instruction>(*OI)) {<br>

+        // Zero out the operand and see if it becomes trivially dead.<br>

+        *OI = 0;<br>

+        if (isInstructionTriviallyDead(U)<u></u>)<br>

+          DeadInsts.push_back(U);<br>

+      }<br>

+<br>

+    ++NumDeleted;<br>

+    I->eraseFromParent();<br>

+  }<br>

+}<br>

+<br>

</blockquote>

<br>

Ciao, Duncan.<div><div><br>

______________________________<u></u>_________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@cs.uiuc.edu" target="_blank">llvm-commits@cs.uiuc.edu</a><br>

<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/<u></u>mailman/listinfo/llvm-commits</a><br>

</div></div></blockquote></div><br></div>