[PATCH] Fix alignment of data in TypeLocs (PR16144)

Eli Friedman eli.friedman at gmail.com
Wed Jun 5 20:06:00 PDT 2013


Patch attached; fixes PR16144.

Currently, the data in TypeLocs consists of a bunch of tightly packed
structures, and the structures can become misaligned because there isn't
any padding.

There are basically three possible approaches to fixing the alignment
issues in TypeLocs:

1) Force every piece of the TypeLoc's data to have alignment 8.
2) Perform dynamic alignment adjustments.
3) Use #pragma pack to let the compiler know the data is intentionally
misaligned.

(1) has a substantial impact on memory usage (something like 1% on
Cocoa.h), so I'd like to avoid it if possible.  (2) is the attached patch;
it avoids both misaligned loads and unnecessary memory usage.  The primary
downside is that TypeLocBuilder becomes a lot more complicated, because it
doesn't know in advance where it needs to insert padding.  (3) keeps around
to misaligned data: there's a potential performance penalty, it requires
being careful not to introduce incorrect accesses to the data, and it's
just plain ugly.


Two questions to focus on for review: would (3) be a better approach?  And
is there any way to make the TypeLocBuilder implementation a bit less ugly?

-Eli
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130605/807ca859/attachment.html>
-------------- next part --------------
Index: include/clang/AST/TypeLoc.h
===================================================================
--- include/clang/AST/TypeLoc.h	(revision 183335)
+++ include/clang/AST/TypeLoc.h	(working copy)
@@ -95,6 +95,10 @@
   /// \brief Returns the size of type source info data block for the given type.
   static unsigned getFullDataSizeForType(QualType Ty);
 
+  /// \brief Returns the alignment of type source info data block for
+  /// the given type.
+  static unsigned getLocalAlignmentForType(QualType Ty);
+
   /// \brief Get the type for which this source info wrapper provides
   /// information.
   QualType getType() const {
@@ -229,7 +233,11 @@
   }
 
   UnqualTypeLoc getUnqualifiedLoc() const {
-    return UnqualTypeLoc(getTypePtr(), Data);
+    unsigned align =
+        TypeLoc::getLocalAlignmentForType(QualType(getTypePtr(), 0));
+    uintptr_t dataInt = reinterpret_cast<uintptr_t>(Data);
+    dataInt = llvm::RoundUpToAlignment(dataInt, align);
+    return UnqualTypeLoc(getTypePtr(), reinterpret_cast<void*>(dataInt));
   }
 
   /// Initializes the local data of this type source info block to
@@ -250,10 +258,11 @@
     return 0;
   }
 
-  /// \brief Returns the size of the type source info data block.
-  unsigned getFullDataSize() const {
-    return getLocalDataSize() +
-      getFullDataSizeForType(getType().getLocalUnqualifiedType());
+  /// \brief Returns the alignment of the type source info data block that is
+  /// specific to this type.
+  unsigned getLocalDataAlignment() const {
+    // We don't preserve any location information.
+    return 1;
   }
 
 private:
@@ -280,9 +289,6 @@
 /// \tparam LocalData the structure type of local location data for
 ///   this type
 ///
-/// sizeof(LocalData) needs to be a multiple of sizeof(void*) or
-/// else the world will end.
-///
 /// TypeLocs with non-constant amounts of local data should override
 /// getExtraLocalDataSize(); getExtraLocalData() will then point to
 /// this extra memory.
@@ -317,13 +323,17 @@
   }
 
 public:
+  unsigned getLocalDataAlignment() const {
+    return std::max(llvm::alignOf<LocalData>(),
+                    asDerived()->getExtraLocalDataAlignment());
+  }
   unsigned getLocalDataSize() const {
-    return sizeof(LocalData) + asDerived()->getExtraLocalDataSize();
+    unsigned size = sizeof(LocalData);
+    unsigned extraAlign = asDerived()->getExtraLocalDataAlignment();
+    size = llvm::RoundUpToAlignment(size, extraAlign);
+    size += asDerived()->getExtraLocalDataSize();
+    return size;
   }
-  // Give a default implementation that's useful for leaf types.
-  unsigned getFullDataSize() const {
-    return asDerived()->getLocalDataSize() + getInnerTypeSize();
-  }
 
   TypeLoc getNextTypeLoc() const {
     return getNextTypeLoc(asDerived()->getInnerType());
@@ -338,6 +348,10 @@
     return 0;
   }
 
+  unsigned getExtraLocalDataAlignment() const {
+    return 1;
+  }
+
   LocalData *getLocalData() const {
     return static_cast<LocalData*>(Base::Data);
   }
@@ -346,11 +360,17 @@
   /// local data that can't be captured in the Info (e.g. because it's
   /// of variable size).
   void *getExtraLocalData() const {
-    return getLocalData() + 1;
+    unsigned size = sizeof(LocalData);
+    unsigned extraAlign = asDerived()->getExtraLocalDataAlignment();
+    size = llvm::RoundUpToAlignment(size, extraAlign);
+    return reinterpret_cast<char*>(Base::Data) + size;
   }
 
   void *getNonLocalData() const {
-    return static_cast<char*>(Base::Data) + asDerived()->getLocalDataSize();
+    uintptr_t data = reinterpret_cast<uintptr_t>(Base::Data);
+    data += asDerived()->getLocalDataSize();
+    data = llvm::RoundUpToAlignment(data, getNextTypeAlign());
+    return reinterpret_cast<void*>(data);
   }
 
   struct HasNoInnerType {};
@@ -373,6 +393,18 @@
     return getInnerTypeLoc().getFullDataSize();
   }
 
+  unsigned getNextTypeAlign() const {
+    return getNextTypeAlign(asDerived()->getInnerType());
+  }
+
+  unsigned getNextTypeAlign(HasNoInnerType _) const {
+    return 1;
+  }
+
+  unsigned getNextTypeAlign(QualType T) const {
+    return TypeLoc::getLocalAlignmentForType(T);
+  }
+
   TypeLoc getNextTypeLoc(HasNoInnerType _) const {
     return TypeLoc();
   }
@@ -417,7 +449,8 @@
                                                Type,
                                                TypeSpecLocInfo> {
 public:
-  enum { LocalDataSize = sizeof(TypeSpecLocInfo) };
+  enum { LocalDataSize = sizeof(TypeSpecLocInfo),
+         LocalDataAlignment = llvm::AlignOf<TypeSpecLocInfo>::Alignment };
 
   SourceLocation getNameLoc() const {
     return this->getLocalData()->NameLoc;
@@ -448,8 +481,6 @@
                                               BuiltinType,
                                               BuiltinLocInfo> {
 public:
-  enum { LocalDataSize = sizeof(BuiltinLocInfo) };
-
   SourceLocation getBuiltinLoc() const {
     return getLocalData()->BuiltinLoc;
   }
@@ -478,6 +509,10 @@
     return needsExtraLocalData() ? sizeof(WrittenBuiltinSpecs) : 0;
   }
 
+  unsigned getExtraLocalDataAlignment() const {
+    return needsExtraLocalData() ? llvm::alignOf<WrittenBuiltinSpecs>() : 1;
+  }
+
   SourceRange getLocalSourceRange() const {
     return SourceRange(getBuiltinLoc(), getBuiltinLoc());
   }
@@ -840,6 +875,10 @@
     return this->getNumProtocols() * sizeof(SourceLocation);
   }
 
+  unsigned getExtraLocalDataAlignment() const {
+    return llvm::alignOf<SourceLocation>();
+  }
+
   QualType getInnerType() const {
     return getTypePtr()->getBaseType();
   }
@@ -1166,6 +1205,10 @@
     return getNumArgs() * sizeof(ParmVarDecl*);
   }
 
+  unsigned getExtraLocalDataAlignment() const {
+    return llvm::alignOf<ParmVarDecl*>();
+  }
+
   QualType getInnerType() const { return getTypePtr()->getResultType(); }
 };
 
@@ -1357,6 +1400,10 @@
     return getNumArgs() * sizeof(TemplateArgumentLocInfo);
   }
 
+  unsigned getExtraLocalDataAlignment() const {
+    return llvm::alignOf<TemplateArgumentLocInfo>();
+  }
+
 private:
   TemplateArgumentLocInfo *getArgInfos() const {
     return static_cast<TemplateArgumentLocInfo*>(getExtraLocalData());
@@ -1761,6 +1808,10 @@
     return getNumArgs() * sizeof(TemplateArgumentLocInfo);
   }
 
+  unsigned getExtraLocalDataAlignment() const {
+    return llvm::alignOf<TemplateArgumentLocInfo>();
+  }
+
 private:
   TemplateArgumentLocInfo *getArgInfos() const {
     return static_cast<TemplateArgumentLocInfo*>(getExtraLocalData());
Index: lib/AST/TypeLoc.cpp
===================================================================
--- lib/AST/TypeLoc.cpp	(revision 183335)
+++ lib/AST/TypeLoc.cpp	(working copy)
@@ -41,12 +41,30 @@
 }
 
 namespace {
+  class TypeAligner : public TypeLocVisitor<TypeAligner, unsigned> {
+  public:
+#define ABSTRACT_TYPELOC(CLASS, PARENT)
+#define TYPELOC(CLASS, PARENT) \
+    unsigned Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
+      return TyLoc.getLocalDataAlignment(); \
+    }
+#include "clang/AST/TypeLocNodes.def"
+  };
+}
+
+/// \brief Returns the alignment of the type source info data block.
+unsigned TypeLoc::getLocalAlignmentForType(QualType Ty) {
+  if (Ty.isNull()) return 1;
+  return TypeAligner().Visit(TypeLoc(Ty, 0));
+}
+
+namespace {
   class TypeSizer : public TypeLocVisitor<TypeSizer, unsigned> {
   public:
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
     unsigned Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
-      return TyLoc.getFullDataSize(); \
+      return TyLoc.getLocalDataSize(); \
     }
 #include "clang/AST/TypeLocNodes.def"
   };
@@ -54,8 +72,18 @@
 
 /// \brief Returns the size of the type source info data block.
 unsigned TypeLoc::getFullDataSizeForType(QualType Ty) {
-  if (Ty.isNull()) return 0;
-  return TypeSizer().Visit(TypeLoc(Ty, 0));
+  unsigned Total = 0;
+  TypeLoc TyLoc(Ty, 0);
+  unsigned MaxAlign = 1;
+  while (!TyLoc.isNull()) {
+    unsigned Align = getLocalAlignmentForType(TyLoc.getType());
+    MaxAlign = std::max(Align, MaxAlign);
+    Total = llvm::RoundUpToAlignment(Total, Align);
+    Total += TypeSizer().Visit(TyLoc);
+    TyLoc = TyLoc.getNextTypeLoc();
+  }
+  Total = llvm::RoundUpToAlignment(Total, MaxAlign);
+  return Total;
 }
 
 namespace {
Index: lib/Sema/CMakeLists.txt
===================================================================
--- lib/Sema/CMakeLists.txt	(revision 183335)
+++ lib/Sema/CMakeLists.txt	(working copy)
@@ -51,6 +51,7 @@
   SemaTemplateVariadic.cpp
   SemaType.cpp
   TargetAttributesSema.cpp
+  TypeLocBuilder.cpp
   )
 
 add_dependencies(clangSema
Index: lib/Sema/SemaTemplateVariadic.cpp
===================================================================
--- lib/Sema/SemaTemplateVariadic.cpp	(revision 183335)
+++ lib/Sema/SemaTemplateVariadic.cpp	(working copy)
@@ -18,6 +18,7 @@
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"
+#include "TypeLocBuilder.h"
 
 using namespace clang;
 
@@ -463,17 +464,13 @@
                                        EllipsisLoc, NumExpansions);
   if (Result.isNull())
     return 0;
-  
-  TypeSourceInfo *TSResult = Context.CreateTypeSourceInfo(Result);
-  PackExpansionTypeLoc TL =
-      TSResult->getTypeLoc().castAs<PackExpansionTypeLoc>();
+
+  TypeLocBuilder TLB;
+  TLB.pushFullCopy(Pattern->getTypeLoc());
+  PackExpansionTypeLoc TL = TLB.push<PackExpansionTypeLoc>(Result);
   TL.setEllipsisLoc(EllipsisLoc);
-  
-  // Copy over the source-location information from the type.
-  memcpy(TL.getNextTypeLoc().getOpaqueData(),
-         Pattern->getTypeLoc().getOpaqueData(),
-         Pattern->getTypeLoc().getFullDataSize());
-  return TSResult;
+
+  return TLB.getTypeSourceInfo(Context, Result);
 }
 
 QualType Sema::CheckPackExpansion(QualType Pattern, SourceRange PatternRange,
Index: lib/Sema/TypeLocBuilder.cpp
===================================================================
--- lib/Sema/TypeLocBuilder.cpp	(revision 0)
+++ lib/Sema/TypeLocBuilder.cpp	(working copy)
@@ -0,0 +1,136 @@
+//===--- TypeLocBuilder.cpp - Type Source Info collector ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This files defines TypeLocBuilder, a class for building TypeLocs
+//  bottom-up.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TypeLocBuilder.h"
+
+using namespace clang;
+
+void TypeLocBuilder::pushFullCopy(TypeLoc L) {
+  size_t Size = L.getFullDataSize();
+  reserve(Size);
+
+  SmallVector<TypeLoc, 4> TypeLocs;
+  TypeLoc CurTL = L;
+  while (CurTL) {
+    TypeLocs.push_back(CurTL);
+    CurTL = CurTL.getNextTypeLoc();
+  }
+
+  for (unsigned i = 0, e = TypeLocs.size(); i < e; ++i) {
+    TypeLoc CurTL = TypeLocs[e-i-1];
+    switch (CurTL.getTypeLocClass()) {
+#define ABSTRACT_TYPELOC(CLASS, PARENT)
+#define TYPELOC(CLASS, PARENT) \
+    case TypeLoc::CLASS: { \
+      CLASS##TypeLoc NewTL = push<class CLASS##TypeLoc>(CurTL.getType()); \
+      memcpy(NewTL.getOpaqueData(), CurTL.getOpaqueData(), NewTL.getLocalDataSize()); \
+      break; \
+    }
+#include "clang/AST/TypeLocNodes.def"
+    }
+  }
+}
+
+void TypeLocBuilder::grow(size_t NewCapacity) {
+  assert(NewCapacity > Capacity);
+
+  // Allocate the new buffer and copy the old data into it.
+  char *NewBuffer = new char[NewCapacity];
+  unsigned NewIndex = Index + NewCapacity - Capacity;
+  memcpy(&NewBuffer[NewIndex],
+         &Buffer[Index],
+         Capacity - Index);
+
+  if (Buffer != InlineBuffer.buffer)
+    delete[] Buffer;
+
+  Buffer = NewBuffer;
+  Capacity = NewCapacity;
+  Index = NewIndex;
+}
+
+TypeLoc TypeLocBuilder::pushImpl(QualType T, size_t LocalSize, unsigned LocalAlignment) {
+#ifndef NDEBUG
+  QualType TLast = TypeLoc(T, 0).getNextTypeLoc().getType();
+  assert(TLast == LastTy &&
+         "mismatch between last type and new type's inner type");
+  LastTy = T;
+#endif
+
+  assert(LocalAlignment <= BufferMaxAlignment && "Unexpected alignment");
+
+  // If we need to grow, grow by a factor of 2.
+  if (LocalSize > Index) {
+    size_t RequiredCapacity = Capacity + (LocalSize - Index);
+    size_t NewCapacity = Capacity * 2;
+    while (RequiredCapacity > NewCapacity)
+      NewCapacity *= 2;
+    grow(NewCapacity);
+  }
+
+  // Because we're adding elements to the TypeLoc backwards, we have to
+  // do some extra work to keep everything aligned appropriately.
+  // FIXME: This algorithm is a absolute mess because every TypeLoc returned
+  // needs to be valid.  Partial TypeLocs are a terrible idea.
+  // FIXME: 4 and 8 are sufficient at the moment, but it's pretty ugly to
+  // hardcode them.
+  if (LocalAlignment == 4) {
+    if (NumBytesAtAlign8 == 0) {
+      NumBytesAtAlign4 += LocalSize;
+    } else {
+      unsigned Padding = NumBytesAtAlign4 % 8;
+      if (Padding == 0) {
+        if (LocalSize % 8 == 0) {
+          // Everything is set: there's no padding and we don't need to add
+          // any.
+        } else {
+          assert(LocalSize % 8 == 4);
+          // No existing padding; add in 4 bytes padding
+          memmove(&Buffer[Index - 4], &Buffer[Index], NumBytesAtAlign4);
+          Index -= 4;
+        }
+      } else {
+        assert(Padding == 4);
+        if (LocalSize % 8 == 0) {
+          // Everything is set: there's 4 bytes padding and we don't need
+          // to add any.
+        } else {
+          assert(LocalSize % 8 == 4);
+          // There are 4 bytes padding, but we don't need any; remove it.
+          memmove(&Buffer[Index + 4], &Buffer[Index], NumBytesAtAlign4);
+          Index += 4;
+        }
+      }
+      NumBytesAtAlign4 += LocalSize;
+    }
+  } else if (LocalAlignment == 8) {
+    if (!NumBytesAtAlign8 && NumBytesAtAlign4 % 8 != 0) {
+      // No existing padding and misaligned members; add in 4 bytes padding
+      memmove(&Buffer[Index - 4], &Buffer[Index], NumBytesAtAlign4);
+      Index -= 4;
+    }
+    // Forget about any padding.
+    NumBytesAtAlign4 = 0;
+    NumBytesAtAlign8 += LocalSize;
+  } else {
+    assert(LocalSize == 0);
+  }
+
+  Index -= LocalSize;
+
+  assert(Capacity - Index == TypeLoc::getFullDataSizeForType(T) &&
+         "incorrect data size provided to CreateTypeSourceInfo!");
+
+  return getTemporaryTypeLoc(T);
+}
Index: lib/Sema/TypeLocBuilder.h
===================================================================
--- lib/Sema/TypeLocBuilder.h	(revision 183335)
+++ lib/Sema/TypeLocBuilder.h	(working copy)
@@ -39,14 +39,20 @@
 #endif
     
   /// The inline buffer.
-  char InlineBuffer[InlineCapacity];
+  enum { BufferMaxAlignmentLog = 3,
+         BufferMaxAlignment = 1 << BufferMaxAlignmentLog };
+  llvm::AlignedCharArray<BufferMaxAlignment, InlineCapacity> InlineBuffer;
+  unsigned NumBytesAtAlign4, NumBytesAtAlign8;
 
  public:
   TypeLocBuilder()
-    : Buffer(InlineBuffer), Capacity(InlineCapacity), Index(InlineCapacity) {}
+    : Buffer(InlineBuffer.buffer), Capacity(InlineCapacity),
+      Index(InlineCapacity), NumBytesAtAlign4(0), NumBytesAtAlign8(0)
+  {
+  }
 
   ~TypeLocBuilder() {
-    if (Buffer != InlineBuffer)
+    if (Buffer != InlineBuffer.buffer)
       delete[] Buffer;
   }
 
@@ -59,23 +65,14 @@
 
   /// Pushes a copy of the given TypeLoc onto this builder.  The builder
   /// must be empty for this to work.
-  void pushFullCopy(TypeLoc L) {
-    size_t Size = L.getFullDataSize();
-    TypeLoc Copy = pushFullUninitializedImpl(L.getType(), Size);
-    memcpy(Copy.getOpaqueData(), L.getOpaqueData(), Size);
-  }
+  void pushFullCopy(TypeLoc L);
 
-  /// Pushes uninitialized space for the given type.  The builder must
-  /// be empty.
-  TypeLoc pushFullUninitialized(QualType T) {
-    return pushFullUninitializedImpl(T, TypeLoc::getFullDataSizeForType(T));
-  }
-
   /// Pushes space for a typespec TypeLoc.  Invalidates any TypeLocs
   /// previously retrieved from this builder.
   TypeSpecTypeLoc pushTypeSpec(QualType T) {
     size_t LocalSize = TypeSpecTypeLoc::LocalDataSize;
-    return pushImpl(T, LocalSize).castAs<TypeSpecTypeLoc>();
+    unsigned LocalAlign = TypeSpecTypeLoc::LocalDataAlignment;
+    return pushImpl(T, LocalSize, LocalAlign).castAs<TypeSpecTypeLoc>();
   }
 
   /// Resets this builder to the newly-initialized state.
@@ -84,6 +81,7 @@
     LastTy = QualType();
 #endif
     Index = Capacity;
+    NumBytesAtAlign4 = NumBytesAtAlign8 = 0;
   }  
 
   /// \brief Tell the TypeLocBuilder that the type it is storing has been
@@ -97,8 +95,10 @@
   /// Pushes space for a new TypeLoc of the given type.  Invalidates
   /// any TypeLocs previously retrieved from this builder.
   template <class TyLocType> TyLocType push(QualType T) {
-    size_t LocalSize = TypeLoc(T, 0).castAs<TyLocType>().getLocalDataSize();
-    return pushImpl(T, LocalSize).castAs<TyLocType>();
+    TyLocType Loc = TypeLoc(T, 0).castAs<TyLocType>();
+    size_t LocalSize = Loc.getLocalDataSize();
+    unsigned LocalAlign = Loc.getLocalDataAlignment();
+    return pushImpl(T, LocalSize, LocalAlign).castAs<TyLocType>();
   }
 
   /// Creates a TypeSourceInfo for the given type.
@@ -127,60 +127,12 @@
   }
 
 private:
-  TypeLoc pushImpl(QualType T, size_t LocalSize) {
-#ifndef NDEBUG
-    QualType TLast = TypeLoc(T, 0).getNextTypeLoc().getType();
-    assert(TLast == LastTy &&
-           "mismatch between last type and new type's inner type");
-    LastTy = T;
-#endif
 
-    // If we need to grow, grow by a factor of 2.
-    if (LocalSize > Index) {
-      size_t RequiredCapacity = Capacity + (LocalSize - Index);
-      size_t NewCapacity = Capacity * 2;
-      while (RequiredCapacity > NewCapacity)
-        NewCapacity *= 2;
-      grow(NewCapacity);
-    }
+  TypeLoc pushImpl(QualType T, size_t LocalSize, unsigned LocalAlignment);
 
-    Index -= LocalSize;
-
-    return getTemporaryTypeLoc(T);
-  }
-
   /// Grow to the given capacity.
-  void grow(size_t NewCapacity) {
-    assert(NewCapacity > Capacity);
+  void grow(size_t NewCapacity);
 
-    // Allocate the new buffer and copy the old data into it.
-    char *NewBuffer = new char[NewCapacity];
-    unsigned NewIndex = Index + NewCapacity - Capacity;
-    memcpy(&NewBuffer[NewIndex],
-           &Buffer[Index],
-           Capacity - Index);
-
-    if (Buffer != InlineBuffer)
-      delete[] Buffer;
-
-    Buffer = NewBuffer;
-    Capacity = NewCapacity;
-    Index = NewIndex;
-  }
-
-  TypeLoc pushFullUninitializedImpl(QualType T, size_t Size) {
-#ifndef NDEBUG
-    assert(LastTy.isNull() && "pushing full on non-empty TypeLocBuilder");
-    LastTy = T;
-#endif
-    assert(Index == Capacity && "pushing full on non-empty TypeLocBuilder");
-
-    reserve(Size);
-    Index -= Size;
-
-    return getTemporaryTypeLoc(T);
-  }
-
 public:
   /// \brief Retrieve a temporary TypeLoc that refers into this \c TypeLocBuilder
   /// object.


More information about the cfe-commits mailing list