<div class="gmail_quote">On Thu, Sep 13, 2012 at 1:32 PM, Duncan Sands <span dir="ltr"><<a href="mailto:baldrick@free.fr" target="_blank">baldrick@free.fr</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

Hi Chandler,<div class="im"><br>

<br>

On 13/09/12 12:15, Chandler Carruth wrote:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

Hello all,<br>

<br>

Here is an updated patch. It contains all of the fixes I discussed here, and it<br>

also contains some significant updates:<br>

<br>

- Now has additionally survived intensive testing by Duncan. All crashers found<br>

with all the dragonegg test suites (including many ada tests etc) are fixed. The<br>

initial and immediate miscompile issues fixed. It's looking pretty good testing<br>

wise.<br>

<br>

- *Many* test cases added. Lots of bugs fixed. Generally, quite a bit of<br>

hardening here. Duncan found several soft areas that I then was able to more<br>

thoroughly exercise.<br>

<br>

<br>

I think this is getting *really* close to being ready for an initial commit. It<br>

is still behind a default-off flag. It will stay there for a bit. In particular<br>

I want to get more testing, I want to implement SSAUpdater-logic and tie it into<br>

the rest of the pass manager and collect final performance results before<br>

enabling it. But I don't want to keep hacking on this out of tree. =]<br>

<br>

Anyways, review greatly appreciated.<br>

</blockquote>

<br></div>

looks great to me, I think you should apply it.</blockquote><div><br></div><div>Thanks so much for the reviews and testing.</div><div><br></div><div>I've committed this version in r163883 as discussed on IRC, and will be addressing your comments below with follow-up patches. Any further comments, I'll also try to provide follow-up patches promptly to address.</div>

<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Some small comments:<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

--- /dev/null<br>

+++ b/lib/Transforms/Scalar/SROA.<u></u>cpp<br>

</blockquote>

...<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+namespace {<br>

+/// \brief Alloca partitioning representation.<br>

+///<br></div>

+/// This class represents a partitioning of an alloca into slices, and<div class="im"><br>

+/// information about the nature of uses of each slice of the alloca. The goal<br>

+/// is that this information is sufficient to decide if and how to split the<br>

+/// alloca apart and replace slices with scalars. It is also intended that this<br>

+/// structure can capture the relevant information needed both due decide about<br>

</div></blockquote>

<br>

due decide -> to decide<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+/// and to enact these transformations.<br>

+class AllocaPartitioning {<br>

+public:<br></div>

+  /// \brief A common base class for representing a half-open byte range.<br>

+  struct ByteRange {<br>

+    /// \brief The beginning offset of the range.<br>

+    uint64_t BeginOffset;<br>

+<br>

+    /// \brief The ending offset, not included in the range.<br>

+    uint64_t EndOffset;<br>

+<br>

+    ByteRange() : BeginOffset(), EndOffset() {}<br>

+    ByteRange(uint64_t BeginOffset, uint64_t EndOffset)<br>

+        : BeginOffset(BeginOffset), EndOffset(EndOffset) {}<br>

+<br>

+    /// \brief Support for ordering ranges.<br>

+    ///<br>

+    /// This provides an ordering over ranges such that start offsets are<br>

+    /// always increasing, and within equal start offsets, the end offsets are<br>

+    /// decreasing. Thus the spanning range comes first in in cluster with the<br>

</blockquote>

<br>

in in -> in a<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    /// same start position.<br>

</blockquote>

...<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  /// \brief A partition of an alloca.<br>

+  ///<br>

+  /// This structure represents a contiguous partition of the alloca. These are<br>

+  /// formed by examining the uses of the alloca. During formation, they may<br>

+  /// overlap but once an AllocaPartitioning is built, the Partitions within it<br>

+  /// are all disjoint.<br>

+  struct Partition : public ByteRange {<br>

+    /// \brief Whether this partition is splittable into smaller partitions.<br>

+    ///<br>

+    /// We flag partitions as splittable when they are formed entirely due to<br>

+    /// accesses by trivially split operations such as memset and memcpy.<br>

</blockquote>

<br>

trivially split -> trivially splittable<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    ///<br>

+    /// FIXME: At some point we should consider loads and stores of FCAs to be<br>

+    /// splittable and eagerly split them into scalar values.<br>

+    bool IsSplittable;<br>

+<br>

+    Partition() : ByteRange(), IsSplittable() {}<br>

+    Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)<br>

+        : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}<br>

+  };<br>

+<br>

+  /// \brief A particular use of a partition of the alloca.<br>

+  ///<br>

+  /// This structure is used to associate uses of a partition with it. They<br>

+  /// mark the range of bytes which are referenced by a particular instruction,<br>

+  /// and includes a handle to the user itself and the pointer value in use.<br>

+  /// The bounds of these uses are determined by intersecting the bounds of the<br>

+  /// memory use itself with a particular partition. As a consequence there is<br>

+  /// intentionally overlap between various usues of the same partition.<br>

</blockquote>

<br>

usues -> uses<br>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  /// \brief Allow iterating the dead users for this alloca.<br>

+  ///<br>

+  /// These are instructions which will never actually use the alloca as they<br>

+  /// are outside the allocated range. They are safe to replace with undef and<br>

+  /// delete.<br>

</blockquote>

<br>

If you were feeling bold you could replace them by "unreachable" (in this<br>

context probably a store to undef would be more appropriate).<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  /// @{<br>

+  typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator;<br>

+  dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); }<br>

+  dead_user_iterator dead_user_end() const { return DeadUsers.end(); }<br>

+  /// @}<br>

+<br>

+  /// \brief Allow iterating the dead operands referring to this alloca.<br>

</blockquote>

<br>

Maybe "dead expressions" would be clearer.  These are just some addresses<br>

computed from the alloca address, right?<br>

<br>

...<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  /// \brief Compute a common type among the uses of a particular partition.<br>

+  ///<br>

+  /// This routines walks all of the uses of a particular partition and tries<br>

+  /// to find a common type between them. Untyped operations such as memset and<br>

+  /// memcpy are ignored.<div class="im"><br>

+  Type *getCommonType(iterator I) const;<br></div><div class="im">

+<br>

+  void print(raw_ostream &OS, const_iterator I, StringRef Indent = "  ") const;<br>

+  void printUsers(raw_ostream &OS, const_iterator I,<br>

+                  StringRef Indent = "  ") const;<br>

+  void print(raw_ostream &OS) const;<br>

+  void dump(const_iterator I) const LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED;<br>

+  void dump() const LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED;<br>

</div></blockquote>

<br>

A patch just went in to wrap "dump" functions in ifndef NDEBUG (plus some other<br>

condition), maybe you should do the same here.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+  /// \brief The partitions of the alloca.<br></div>

+  ///<div class="im"><br>

+  /// We store a vector of the partitions over the alloca here. This vector is<br>

+  /// sorted by increasing begin offset, and then by decreasing end offset. See<br></div>

+  /// the Partition inner class for more details. Initially there are overlaps,<br>

+  /// be during construction we form a disjoint sequence toward the end.<br>

</blockquote>

<br>

be during -> ?<br>

I couldn't make much sense of this sentence.<br>

<br>

...<div class="im"><br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  bool computeConstantGEPOffset(<u></u>GetElementPtrInst &GEPI, uint64_t &GEPOffset) {<br>

</blockquote>

<br></div>

Is this routine really not available from some other part of LLVM?<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+  Value *foldSelectInst(SelectInst &SI) {<br>

+    // If the condition being selected on is a constant or the same value is<br>

+    // being selected between, fold the select. Yes this does (rarely) happen<br>

+    // early on.<br></div><div class="im">

+    if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.<u></u>getCondition()))<br>

+      return SI.getOperand(1+CI->isZero());<br>

+    if (SI.getOperand(1) == SI.getOperand(2)) {<br></div>

+      assert(*U == SI.getOperand(1));<br>

</blockquote>

<br>

Urgh, what is the *U that turns up here?  Seems like breaking the abstraction.<div class="im"><br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+      return SI.getOperand(1);<br>

+    }<br>

+    return 0;<br>

+  }<br>

+};<br>

</blockquote>

<br></div>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+  bool handleLoadOrStore(Type *Ty, Instruction &I) {<br></div>

+    uint64_t Size = TD.getTypeStoreSize(Ty);<br>

+<div class="im"><br>

+    // If this memory access can be shown to *statically* extend outside the<br>

+    // bounds of of the allocation, it's behavior is undefined, so simply<br></div>

+    // ignore it. Note that this is more strict than the generic clamping<br>

+    // behavior of insertUse. We also try to handle cases which might run the<br>

+    // risk of overflow.<br>

+    // FIXME: We should instead consider the pointer to have escaped if this<div class="im"><br>

+    // function is being instrumented for addressing bugs or race conditions.<br></div>

+    if (Offset >= AllocSize || Size > AllocSize || Offset + Size > AllocSize) {<br>

</blockquote>

<br>

Such accesses could be replaced by "unreachable".<div class="im"><br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  bool visitGetElementPtrInst(<u></u>GetElementPtrInst &GEPI) {<br>

+    //unsigned IntPtrWidth = TD->getPointerSizeInBits();<br>

+    //assert(IntPtrWidth == Offset.getBitWidth());<br>

</blockquote>

<br></div>

Commented out code above...<div class="im"><br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    uint64_t GEPOffset;<br>

+    if (!computeConstantGEPOffset(<u></u>GEPI, GEPOffset))<br>

+      return markAsEscaping(GEPI);<br>

+<br>

+    enqueueUsers(GEPI, GEPOffset);<br>

+    return true;<br>

+  }<br>

</blockquote>

<br></div>

...<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  bool visitMemSetInst(MemSetInst &II) {<br>

</blockquote>

<br>

It is impossible for the use to be the size or the byte argument, right, because<br>

you only deal with expressions of pointer type?  But if someone adds ptrtoint<br>

support one day then you could be in trouble here, so better to check...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+    ConstantInt *Length = dyn_cast<ConstantInt>(II.<u></u>getLength());<br></div>

+    insertUse(II, Length ? Length->getZExtValue() : AllocSize - Offset, Length);<br>

+    return true;<br>

+  }<br>

</blockquote>

<br>

...<div class="im"><br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  // Disable SRoA for any intrinsics except for lifetime invariants.<br>

</blockquote>

<br></div>

What about debug intrinsics?<br>

<br>

...<div class="im"><br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// \brief Use adder for the alloca partitioning.<br>

+///<br>

+/// This class adds the uses of an alloca to all of the partitions which it<br>

+/// uses.<br>

</blockquote>

<br></div>

Maybe should read: This class adds the uses of an alloca to all of the<br>

partitions which they use.<div class="im"><br>

<br>

 For splittable partitions, this can end up doing essentially a linear<br>

</div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+/// walk of the partitions, but the number of steps remains bounded by the<br>

+/// total result instruction size:<br>

+/// - The number of partitions is a result of the number unsplittable<br>

+///   instructions using the alloca.<br>

+/// - The number of users of each partition is at worst the total number of<br>

+///   splittable instructions using the alloca.<br></div><div class="im">

+/// Thus we will produce N * M instructions in the end, where N are the number<br>

+/// of unsplittable uses and M are the number of splittable. This visitor does<br>

+/// the exact same number of updates to the partitioning.<br>

+///<br>

+/// In the more common case, this visitor will leverage the fact that the<br>

+/// partition space is pre-sorted, and do a logarithmic search for the<br>

+/// partition needed, making the total visit a classical ((N + M) * log(N))<br>

+/// complexity operation.<br>

</div></blockquote>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  void visitGetElementPtrInst(<u></u>GetElementPtrInst &GEPI) {<br>

+    if (GEPI.use_empty())<br>

+      return markAsDead(GEPI);<div class="im"><br>

+<br>

+    //unsigned IntPtrWidth = TD->getPointerSizeInBits();<br>

+    //assert(IntPtrWidth == Offset.getBitWidth());<br>

</div></blockquote>

<br>

Commented out code...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+    uint64_t GEPOffset;<br>

+    if (!computeConstantGEPOffset(<u></u>GEPI, GEPOffset))<br></div>

+      llvm_unreachable("Unable to compute constant offset for use");<div class="im"><br>

+<br>

+    enqueueUsers(GEPI, GEPOffset);<br>

+  }<br>

</div></blockquote>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  /// \brief A set of deleted alloca instructions.<br>

+  ///<br>

+  /// These pointers are *no longer valid* as they have been deleted. They are<br>

+  /// used to remove deleted allocas from the list of promotable allocas.<br>

+  SmallPtrSet<AllocaInst *, 4> DeletedAllocas;<br>

</blockquote>

<br>

Does this have to live as a member variable?  It is only used in a very<br>

localized way: it could be declared in runOnFunction and passed to<br>

deleteDeadInstructions to fill in.  What is dangerous about this kind of set<br>

is that you mustn't allocate any new alloca's after adding something to the<br>

set (and before clearing the set) since it might be allocated with the address<br>

of the dead alloca, causing confusion and non-determinism.  Keeping the set<br>

highly local makes it obvious that this can't happen.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>

+  /// \brief A collection of alloca instructions we can directly promote.<br>

+  std::vector<AllocaInst *> PromotableAllocas;<br>

</blockquote>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.<br>

+///<br>

+/// If the provided GEP is all-constant, the total byte offset formed by the<br>

+/// GEP is computed and Offset is set to it. If the GEP has any non-constant<br>

+/// operands, the function returns false and the value of Offset is unmodified.<br>

+static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,<br>

+                                 APInt &Offset) {<br>

</blockquote>

<br>

This also sounds like the kind of routine that maybe exists already (or should<br>

exist already).<br>

<br>

...<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// \brief Recursively compute indices for a natural GEP.<br>

+///<br>

+/// This is the recursive step for getNaturalGEPWithOffset that walks down the<br>

+/// element types adding appropriate indices for the GEP.<br>

+static Value *getNaturalGEPRecursively(<u></u>IRBuilder<> &IRB, const TargetData &TD,<br>

+                                       Value *Ptr, Type *Ty, APInt &Offset,<div class="im"><br>

+                                       Type *TargetTy,<br>

+                                       SmallVectorImpl<Value *> &Indices,<br>

+                                       const Twine &Prefix) {<br></div>

+  if (Offset == 0)<br>

+    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);<br>

+<br>

+  // We can't recurse through pointer types.<br>

+  if (Ty->isPointerTy())<br>

+    return 0;<br>

+<br>

+  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {<br>

+    unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();<br>

+    if (ElementSizeInBits % 8)<br>

+      return 0; // GEPs over multiple of 8 size vector elements are invalid.<br>

</blockquote>

<br>

over multiple -> over non-multiple<br>

You invented a new rule here, maybe add a comment that this whole area is a can<br>

of worms and not properly defined right now.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    APInt ElementSize(Offset.<u></u>getBitWidth(), ElementSizeInBits / 8);<br>

+    APInt NumSkippedElements = Offset.udiv(ElementSize);<br>

+    if (NumSkippedElements.ugt(VecTy-<u></u>>getNumElements()))<br>

+      return 0;<div class="im"><br>

+    Offset -= NumSkippedElements * ElementSize;<br></div>

+    Indices.push_back(IRB.getInt(<u></u>NumSkippedElements));<br>

+    return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),<br>

+                                    Offset, TargetTy, Indices, Prefix);<br>

+  }<br>

</blockquote>

<br>

Above you seem to be assuming that the elements are naturally aligned, i.e. have<br>

alloc size equal to the size in bits.  I'm not sure that is guaranteed.<br>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+  StructType *STy = dyn_cast<StructType>(Ty);<br>

+  if (!STy)<br>

+    return 0;<br>

+<br></div><div class="im">

+  const StructLayout *SL = TD.getStructLayout(STy);<br></div>

+  uint64_t StructOffset = Offset.getZExtValue();<br>

+  if (StructOffset > SL->getSizeInBytes())<br>

</blockquote>

<br>

Probably this should be >= here.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    return 0;<br>

+  unsigned Index = SL-><u></u>getElementContainingOffset(<u></u>StructOffset);<br>

+  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));<br>

+  Type *ElementTy = STy->getElementType(Index);<br>

+  if (Offset.uge(TD.<u></u>getTypeAllocSize(ElementTy)))<br>

+    return 0; // The offset points into alignment padding.<br>

+<br>

+  Indices.push_back(IRB.<u></u>getInt32(Index));<br>

+  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>

+                                  Indices, Prefix);<br>

+}<br>

</blockquote>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>

+/// \brief Get a natural GEP from a base pointer to a particular offset and<br>

+/// resulting in a particular type.<br>

+///<br>

+/// The goal is to produce a "natural" looking GEP that works with the existing<br>

+/// composite types to arrive at the appropriate offset and element type for<br>

+/// a pointer. TargetTy is the element type the returned GEP should point-to if<br>

+/// possible. We recurse by decreasing Offset, adding the appropriate index to<br>

+/// Indices, and setting Ty to the result subtype.<br>

+///<br>

+/// If no natural GEP can be constructed, this function returns a null Value*.<br>

</blockquote>

<br>

returns a null Value* -> returns null<br>

<br>

...<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// \brief Test whether the given alloca partition can be promoted to a vector.<br>

+///<br>

+/// This is a quick test to check whether we can rewrite a particular alloca<br>

+/// partition (and its newly formed alloca) into a vector alloca with only<br>

+/// whole-vector loads and stores such that it could be promoted to a vector<br>

+/// SSA value. We only can ensure this for a limited set of operations, and we<br>

+/// don't want to do the rewrites unless we are confident that the result will<br>

+/// be promotable, so we have an early test here.<br>

+static bool isVectorPromotionViable(const TargetData &TD,<br>

+                                    Type *AllocaTy,<br>

+                                    AllocaPartitioning &P,<br>

+                                    uint64_t PartitionBeginOffset,<br>

+                                    uint64_t PartitionEndOffset,<br>

+                                    AllocaPartitioning::const_use_<u></u>iterator I,<br>

+                                    AllocaPartitioning::const_use_<u></u>iterator E) {<br>

+  VectorType *Ty = dyn_cast<VectorType>(AllocaTy)<u></u>;<br>

+  if (!Ty)<br>

+    return false;<br>

+<br>

+  uint64_t VecSize = TD.getTypeSizeInBits(Ty);<br>

+  uint64_t ElementSize = Ty->getScalarSizeInBits();<br>

+<br>

+  // While the definition of LLVM vectors is bitpacked, we don't support sizes<br>

+  // that aren't byte sized.<br>

+  if (ElementSize % 8)<br>

+    return false;<br>

</blockquote>

<br>

You should probably also bail out if ElementSize is not the same as the alloc<br>

size, since no-one cares about those and they are very easy to get wrong.  In<br>

fact that would make the % 8 test redundant.  I reckon you should replace all<br>

of your % 8 tests everywhere with a utility function that checks that the alloc<br>

size and the bit size coincide.<br>

<br>

...<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+/// \brief Try to find a partition of the aggregate type passed in for a given<div class="im"><br>

+/// offset and size.<br>

+///<br>

+/// This recurses through the aggregate type and tries to compute a subtype<br>

+/// based on the offset and size. When the offset and size span a sub-section<br>

+/// of an array, it will even compute a new array type for that sub-section.<br>

+static Type *getTypePartition(const TargetData &TD, Type *Ty,<br>

+                              uint64_t Offset, uint64_t Size) {<br>

+  if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)<br>

+    return Ty;<br>

+<br></div>

+  if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {<br>

+    // We can't partition pointers...<br>

+    if (SeqTy->isPointerTy())<br>

+      return 0;<br>

+<div class="im"><br>

+    Type *ElementTy = SeqTy->getElementType();<br>

+    uint64_t ElementSize = TD.getTypeAllocSize(ElementTy)<u></u>;<br>

+    uint64_t NumSkippedElements = Offset / ElementSize;<br>

+    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))<br></div>

+      if (NumSkippedElements >= ArrTy->getNumElements())<br>

+        return 0;<br>

+    if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))<br>

+      if (NumSkippedElements >= VecTy->getNumElements())<br>

+        return 0;<div class="im"><br>

+    Offset -= NumSkippedElements * ElementSize;<br>

+<br>

+    // First check if we need to recurse.<br>

+    if (Offset > 0 || Size < ElementSize) {<br>

+      // Bail if the partition ends in a different array element.<br>

+      if ((Offset + Size) > ElementSize)<br>

+        return 0;<br></div><div class="im">

+      // Recurse through the element type trying to peel off offset bytes.<br>

+      return getTypePartition(TD, ElementTy, Offset, Size);<br>

+    }<br>

+    assert(Offset == 0);<br></div><div class="im">

+<br>

+    if (Size == ElementSize)<br>

+      return ElementTy;<br>

+    assert(Size > ElementSize);<br></div><div class="im">

+    uint64_t NumElements = Size / ElementSize;<br>

+    if (NumElements * ElementSize != Size)<br>

+      return 0;<br>

+    return ArrayType::get(ElementTy, NumElements);<br>

+  }<br>

+<br>

+  StructType *STy = dyn_cast<StructType>(Ty);<br>

+  if (!STy)<br>

+    return 0;<br>

+<br></div><div class="im">

+  const StructLayout *SL = TD.getStructLayout(STy);<br></div>

+  if (Offset > SL->getSizeInBytes())<br>

+    return 0;<br>

+  uint64_t EndOffset = Offset + Size;<br>

+  if (EndOffset > SL->getSizeInBytes())<br>

+    return 0;<br>

+<br>

+  unsigned Index = SL-><u></u>getElementContainingOffset(<u></u>Offset);<br>

+  if (SL->getElementOffset(Index) != Offset)<br>

+    return 0; // Inside of padding.<br>

+  Offset -= SL->getElementOffset(Index);<br>

+<br>

+  Type *ElementTy = STy->getElementType(Index);<div class="im"><br>

+  uint64_t ElementSize = TD.getTypeAllocSize(ElementTy)<u></u>;<br></div><div class="im">

+  if (Offset >= ElementSize)<br></div>

+    return 0; // The offset points into alignment padding.<br>

+<br>

+  // See if any partition must be contained by the element.<div class="im"><br>

+  if (Offset > 0 || Size < ElementSize) {<br></div><div class="im">

+    if ((Offset + Size) > ElementSize)<br>

+      return 0;<br>

+    // Bail if this is a poniter element, we can't recurse through them.<br>

+    if (ElementTy->isPointerTy())<br>

+      return 0;<br>

</div></blockquote>

<br>

Why handle this special case here, since getTypePartition will take care of it.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+    return getTypePartition(TD, ElementTy, Offset, Size);<br>

+  }<br>

+  assert(Offset == 0);<br></div><div class="im">

+<br>

+  if (Size == ElementSize)<br>

+    return ElementTy;<br>

+<br></div>

+  StructType::element_iterator EI = STy->element_begin() + Index,<div class="im"><br>

+                               EE = STy->element_end();<br></div>

+  if (EndOffset < SL->getSizeInBytes()) {<br>

+    unsigned EndIndex = SL-><u></u>getElementContainingOffset(<u></u>EndOffset);<br>

</blockquote>

<br>

Shouldn't this be EndOffset - 1?  (In which case it should be <= above).<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    if (Index == EndIndex)<br>

+      return 0; // Within a single element and its padding.<br>

+    assert(Index < EndIndex);<br>

+    assert(Index + EndIndex <= STy->getNumElements());<br>

+    EE = STy->element_begin() + EndIndex;<br>

+  }<br>

+<br>

+  // Try to build up a sub-structure.<div class="im"><br>

+  SmallVector<Type *, 4> ElementTys;<br></div>

+  do {<br>

+    ElementTys.push_back(*EI++);<br>

+  } while (EI != EE);<br>

+  StructType *SubTy = StructType::get(STy-><u></u>getContext(), ElementTys,<br>

+                                      STy->isPacked());<br>

+  const StructLayout *SubSL = TD.getStructLayout(SubTy);<br>

+  if (Size == SubSL->getSizeInBytes())<br>

+    return SubTy;<br>

+<br>

+  // FIXME: We could potentially recurse down through the last element in the<br>

+  // sub-struct to find a natural end point.<div class="im"><br>

+  return 0;<br>

+}<br>

+<br></div>

+/// \brief Rewrite an alloca partition's users.<br>

+///<br>

+/// This routine drives both of the rewriting goals of the SROA pass. It tries<br>

+/// to rewrite uses of an alloca partition to be conducive for SSA value<br>

+/// promotion. If the partition needs a new, more refined alloca, this will<br>

+/// build that new alloca, preserving as much type information as possible, and<br>

+/// rewrite the uses of the old alloca to point at the new one and have the<br>

+/// appropriate new offsets. It also evaluates how successful the rewrite was<br>

+/// at enabling promotion and if it was successful queues the alloca to be<br>

+/// promoted.<br>

+bool SROA::rewriteAllocaPartition(<u></u>AllocaInst &AI,<br>

+                                  AllocaPartitioning &P,<div class="im"><br>

+                                  AllocaPartitioning::iterator PI) {<br>

+  uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset;<br></div>

+  if (P.use_begin(PI) == P.use_end(PI))<br>

+    return false; // No live uses left of this partition.<br>

+<div class="im"><br>

+  // Try to compute a friendly type for this partition of the alloca. This<br>

+  // won't always succeed, in which case we fall back to a legal integer type<br>

+  // or an i8 array of an appropriate size.<br>

+  Type *AllocaTy = 0;<br>

+  if (Type *PartitionTy = P.getCommonType(PI))<br></div>

+    if (TD->getTypeAllocSize(<u></u>PartitionTy) >= AllocaSize)<br>

+      AllocaTy = PartitionTy;<br>

+  if (!AllocaTy)<div class="im"><br>

+    if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(),<br>

+                                             PI->BeginOffset, AllocaSize))<br>

+      AllocaTy = PartitionTy;<br>

+  if ((!AllocaTy ||<br>

+       (AllocaTy->isArrayTy() &&<br>

+        AllocaTy->getArrayElementType(<u></u>)->isIntegerTy())) &&<br>

+      TD->isLegalInteger(AllocaSize * 8))<br>

+    AllocaTy = Type::getIntNTy(*C, AllocaSize * 8);<br>

+  if (!AllocaTy)<br>

+    AllocaTy = ArrayType::get(Type::<u></u>getInt8Ty(*C), AllocaSize);<br>

</div></blockquote>

<br>

How about an assert here that the alloc size of AllocaTy is >= AllocaSize.<br>

<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">

+  // Check for the case where we're going to rewrite to a new alloca of the<br>

+  // exact same type as the original, and with the same access offsets. In that<br>

+  // case, re-use the existing alloca, but still run through the rewriter to<br>

+  // performe phi and select speculation.<br>

+  AllocaInst *NewAI;<br>

+  if (AllocaTy == AI.getAllocatedType()) {<br></div><div class="im">

+    assert(PI->BeginOffset == 0 &&<br>

+           "Non-zero begin offset but same alloca type");<br>

+    assert(PI == P.begin() && "Begin offset is zero on later partition");<br>

+    NewAI = &AI;<br>

+  } else {<br></div>

+    // FIXME: The alignment here is overly conservative -- we could in many<br>

+    // cases get away with much weaker alignment constraints.<div class="im"><br>

+    NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(),<br>

+                           AI.getName() + ".sroa." + Twine(PI - P.begin()),<br>

+                           &AI);<br>

+    ++NumNewAllocas;<br>

+  }<br>

+<br>

+  DEBUG(dbgs() << "Rewriting alloca partition "<br>

+               << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: "<br>

+               << *NewAI << "\n");<br>

+<br>

+  AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI,<br>

+                                   PI->BeginOffset, PI->EndOffset);<br>

+  DEBUG(dbgs() << "  rewriting ");<br>

+  DEBUG(P.print(dbgs(), PI, ""));<br>

+  if (Rewriter.visitUsers(P.use_<u></u>begin(PI), P.use_end(PI))) {<br>

+    DEBUG(dbgs() << "  and queuing for promotion\n");<br>

+    PromotableAllocas.push_back(<u></u>NewAI);<br>

+  } else if (NewAI != &AI) {<br>

+    // If we can't promote the alloca, iterate on it to check for new<br>

+    // refinements exposed by splitting the current alloca. Don't iterate on an<br>

+    // alloca which didn't actually change and didn't get promoted.<br>

+    Worklist.insert(NewAI);<br></div><div class="im">

+  }<br>

+  return true;<br>

+}<br>

</div></blockquote>

<br>

...<br>

<br>

Ciao, Duncan.<br>

</blockquote></div><br>