[llvm-commits] RFC: initial union syntax support

Andrew Lenharth andrewl at lenharth.org
Tue May 12 16:00:13 PDT 2009


Updated patch.  Supports C, Cpp, and all (?) native codegen.  BasicAA
updated to be conservative (but not completely enough yet).

Making unions a type of struct makes most code just work as most code
uses getOffset variants since structs can already have zero width
elements.

Index: include/llvm/DerivedTypes.h
===================================================================
--- include/llvm/DerivedTypes.h	(revision 71552)
+++ include/llvm/DerivedTypes.h	(working copy)
@@ -218,13 +218,17 @@
   friend class TypeMap<StructValType, StructType>;
   StructType(const StructType &);                   // Do not implement
   const StructType &operator=(const StructType &);  // Do not implement
-  StructType(const std::vector<const Type*> &Types, bool isPacked);
+  StructType(const std::vector<const Type*> &Types,
+             bool isPacked, bool isUnion);
+  enum {structPlain = 0,
+        structPacked = 1,
+        structUnion = 2};
 public:
   /// StructType::get - This static method is the primary way to create a
   /// StructType.
   ///
   static StructType *get(const std::vector<const Type*> &Params,
-                         bool isPacked=false);
+                         bool isPacked=false, bool isUnion=false);

   /// StructType::get - This static method is a convenience method for
   /// creating structure types by specifying the elements as arguments.
@@ -262,7 +266,9 @@
     return T->getTypeID() == StructTyID;
   }

-  bool isPacked() const { return (0 != getSubclassData()) ? true : false; }
+  bool isPacked() const { return structPacked == getSubclassData(); }
+  bool isUnion() const { return structUnion == getSubclassData(); }
+
 };


Index: include/llvm/Bitcode/LLVMBitCodes.h
===================================================================
--- include/llvm/Bitcode/LLVMBitCodes.h	(revision 71552)
+++ include/llvm/Bitcode/LLVMBitCodes.h	(working copy)
@@ -90,7 +90,11 @@
     // binary compatibility.
     TYPE_CODE_X86_FP80 = 13,   // X86 LONG DOUBLE
     TYPE_CODE_FP128    = 14,   // LONG DOUBLE (112 bit mantissa)
-    TYPE_CODE_PPC_FP128= 15    // PPC LONG DOUBLE (2 doubles)
+    TYPE_CODE_PPC_FP128= 15,   // PPC LONG DOUBLE (2 doubles)
+
+    //Merge UNIOIN with STRUCT in LLVM 3.0
+    TYPE_CODE_UNION    = 16    // UNIOIN: [eltty x N]
+
     // Any other type code is assumed to be an unknown type.
   };

Index: docs/LangRef.html
===================================================================
--- docs/LangRef.html	(revision 71552)
+++ docs/LangRef.html	(working copy)
@@ -1579,8 +1579,31 @@
   </tr>
 </table>
 </div>
-
 <!-- _______________________________________________________________________
-->
+<div class="doc_subsubsection"> <a name="t_ustruct">Union Structure Type</a>
+</div>
+<div class="doc_text">
+<h5>Overview:</h5>
+<p>The union structure type is used to represent a collection of data members
+overlapping in memory.  All fields start at an offset of zero.  The elements of
+a union structure may be any type that has a size.  The size of a union is the
+size of the largest element.  The alignment is the alignment of the
most restricted
+element.</p>
+<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
+and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
+field with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>'
+instruction.</p>
+<h5>Syntax:</h5>
+<pre>  < union { <type list> } > <br></pre>
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>< union { i32, i32*, i64 } ></tt></td>
+    <td class="left">A union of three values</td>
+  </tr><tr class="layout">
+</table>
+</div>
+<!-- _______________________________________________________________________
-->
 <div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
 <div class="doc_text">
 <h5>Overview:</h5>
Index: lib/Analysis/BasicAliasAnalysis.cpp
===================================================================
--- lib/Analysis/BasicAliasAnalysis.cpp	(revision 71552)
+++ lib/Analysis/BasicAliasAnalysis.cpp	(working copy)
@@ -529,6 +529,12 @@

   const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);

+  //Fixme: Be conservative for Unions
+  //Fixme: This doesn't handle embedded unions
+  if (const StructType* STy =
dyn_cast<StructType>(GEPPointerTy->getElementType()))
+    if (STy->isUnion())
+      return MayAlias;
+
   // Find the (possibly empty) initial sequence of equal values...
which are not
   // necessarily constants.
   unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
@@ -570,7 +576,6 @@
     if (AllAreZeros) return MustAlias;
   }

-
   // So now we know that the indexes derived from the base pointers,
   // which are known to alias, are different.  We can still determine a
   // no-alias result if there are differing constant pairs in the index
Index: lib/Target/CBackend/CBackend.cpp
===================================================================
--- lib/Target/CBackend/CBackend.cpp	(revision 71552)
+++ lib/Target/CBackend/CBackend.cpp	(working copy)
@@ -2132,7 +2132,9 @@
   // Print out forward declarations for structure types before anything else!
   Out << "/* Structure forward decls */\n";
   for (; I != End; ++I) {
-    std::string Name = "struct l_" + Mang->makeNameProper(I->first);
+    const StructType* STy = dyn_cast<StructType>(I->second);
+    std::string Name = ((STy && STy->isUnion()) ? "union l_" : "struct l_")
+      + Mang->makeNameProper(I->first);
     Out << Name << ";\n";
     TypeNames.insert(std::make_pair(I->second, Name));
   }
Index: lib/Target/CppBackend/CPPBackend.cpp
===================================================================
--- lib/Target/CppBackend/CPPBackend.cpp	(revision 71552)
+++ lib/Target/CppBackend/CPPBackend.cpp	(working copy)
@@ -572,7 +572,10 @@
       }
       Out << "StructType* " << typeName << " = StructType::get("
           << typeName << "_fields, /*isPacked=*/"
-          << (ST->isPacked() ? "true" : "false") << ");";
+          << (ST->isPacked() ? "true" : "false")
+          << ", /*isUnion=*/"
+          << (ST->isUnion() ? "true" : "false")
+          << ");";
       nl(Out);
       break;
     }
Index: lib/Target/TargetData.cpp
===================================================================
--- lib/Target/TargetData.cpp	(revision 71552)
+++ lib/Target/TargetData.cpp	(working copy)
@@ -57,8 +57,13 @@
     // Keep track of maximum alignment constraint.
     StructAlignment = std::max(TyAlign, StructAlignment);

-    MemberOffsets[i] = StructSize;
-    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+    if (ST->isUnion()) {
+      MemberOffsets[i] = 0;
+      StructSize = std::max(StructSize, TD.getTypeAllocSize(Ty));
+    } else {
+      MemberOffsets[i] = StructSize;
+      StructSize += TD.getTypeAllocSize(Ty); // Consume space for
this data item
+    }
   }

   // Empty structures have alignment of 1 byte.
@@ -84,6 +89,7 @@
          "Upper bound didn't work!");

   // Multiple fields can have the same offset if any of them are zero sized.
+  // This will also happen for union structures
   // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
   // at the i32 element, because it is the last element at that
offset.  This is
   // the right one to return, because anything after it will have a higher
Index: lib/VMCore/AsmWriter.cpp
===================================================================
--- lib/VMCore/AsmWriter.cpp	(revision 71552)
+++ lib/VMCore/AsmWriter.cpp	(working copy)
@@ -225,6 +225,8 @@
     const StructType *STy = cast<StructType>(Ty);
     if (STy->isPacked())
       OS << '<';
+    if (STy->isUnion())
+      OS << "union ";
     OS << "{ ";
     for (StructType::element_iterator I = STy->element_begin(),
          E = STy->element_end(); I != E; ++I) {
Index: lib/VMCore/Type.cpp
===================================================================
--- lib/VMCore/Type.cpp	(revision 71552)
+++ lib/VMCore/Type.cpp	(working copy)
@@ -338,11 +338,13 @@
   setAbstract(isAbstract);
 }

-StructType::StructType(const std::vector<const Type*> &Types, bool isPacked)
+StructType::StructType(const std::vector<const Type*> &Types, bool isPacked,
+                       bool isUnion)
   : CompositeType(StructTyID) {
   ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
   NumContainedTys = Types.size();
-  setSubclassData(isPacked);
+  assert(!(isPacked && isUnion) && "Packed union not supported");
+  setSubclassData(isPacked + (int)isUnion * 2);
   bool isAbstract = false;
   for (unsigned i = 0; i < Types.size(); ++i) {
     assert(Types[i] != Type::VoidTy && "Void type for structure field!!");
@@ -1107,9 +1109,11 @@
 class StructValType {
   std::vector<const Type*> ElTypes;
   bool packed;
+  bool _union;
 public:
-  StructValType(const std::vector<const Type*> &args, bool isPacked)
-    : ElTypes(args), packed(isPacked) {}
+  StructValType(const std::vector<const Type*> &args,
+                bool isPacked, bool isUnion)
+    : ElTypes(args), packed(isPacked), _union(isUnion) {}

   static StructValType get(const StructType *ST) {
     std::vector<const Type *> ElTypes;
@@ -1117,7 +1121,7 @@
     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
       ElTypes.push_back(ST->getElementType(i));

-    return StructValType(ElTypes, ST->isPacked());
+    return StructValType(ElTypes, ST->isPacked(),  ST->isUnion());
   }

   static unsigned hashTypeStructure(const StructType *ST) {
@@ -1127,7 +1131,9 @@
   inline bool operator<(const StructValType &STV) const {
     if (ElTypes < STV.ElTypes) return true;
     else if (ElTypes > STV.ElTypes) return false;
-    else return (int)packed < (int)STV.packed;
+    else
+      return (int)packed + 2 * (int)_union
+           < (int)STV.packed + 2 * (int)STV._union;
   }
 };
 }
@@ -1135,15 +1141,15 @@
 static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;

 StructType *StructType::get(const std::vector<const Type*> &ETypes,
-                            bool isPacked) {
-  StructValType STV(ETypes, isPacked);
+                            bool isPacked, bool isUnion) {
+  StructValType STV(ETypes, isPacked, isUnion);
   StructType *ST = StructTypes->get(STV);
   if (ST) return ST;

   // Value not found.  Derive a new type!
   ST = (StructType*) operator new(sizeof(StructType) +
                                   sizeof(PATypeHandle) * ETypes.size());
-  new (ST) StructType(ETypes, isPacked);
+  new (ST) StructType(ETypes, isPacked, isUnion);
   StructTypes->add(STV, ST);

 #ifdef DEBUG_MERGE_TYPES
Index: lib/AsmParser/LLParser.cpp
===================================================================
--- lib/AsmParser/LLParser.cpp	(revision 71552)
+++ lib/AsmParser/LLParser.cpp	(working copy)
@@ -964,9 +964,15 @@
     Result = OpaqueType::get();
     Lex.Lex();
     break;
+  case lltok::kw_union:
+    // TypeRec ::= 'union' ...
+    Lex.Lex(); //eat the kw_union
+    if (ParseStructType(Result, false, true))
+      return true;
+    break;
   case lltok::lbrace:
     // TypeRec ::= '{' ... '}'
-    if (ParseStructType(Result, false))
+    if (ParseStructType(Result, false, false))
       return true;
     break;
   case lltok::lsquare:
@@ -979,7 +985,7 @@
     // TypeRec ::= '<' ... '>'
     Lex.Lex();
     if (Lex.getKind() == lltok::lbrace) {
-      if (ParseStructType(Result, true) ||
+      if (ParseStructType(Result, true, false) ||
           ParseToken(lltok::greater, "expected '>' at end of packed struct"))
         return true;
     } else if (ParseArrayVectorType(Result, true))
@@ -1222,18 +1228,22 @@
   return false;
 }

-/// ParseStructType: Handles packed and unpacked types.  </> parsed elsewhere.
+/// ParseStructType: Handles packed and unpacked types.  </> and union
+///                  parsed elsewhere.
 ///   TypeRec
 ///     ::= '{' '}'
 ///     ::= '{' TypeRec (',' TypeRec)* '}'
 ///     ::= '<' '{' '}' '>'
 ///     ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
-bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
+///     ::= 'union' '{' '}'
+///     ::= 'union' '{' TypeRec (',' TypeRec)* '}'
+
+bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed, bool Union) {
   assert(Lex.getKind() == lltok::lbrace);
   Lex.Lex(); // Consume the '{'

   if (EatIfPresent(lltok::rbrace)) {
-    Result = StructType::get(std::vector<const Type*>(), Packed);
+    Result = StructType::get(std::vector<const Type*>(), Packed, Union);
     return false;
   }

@@ -1261,7 +1271,7 @@
   std::vector<const Type*> ParamsListTy;
   for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
     ParamsListTy.push_back(ParamsList[i].get());
-  Result = HandleUpRefs(StructType::get(ParamsListTy, Packed));
+  Result = HandleUpRefs(StructType::get(ParamsListTy, Packed, Union));
   return false;
 }

Index: lib/AsmParser/LLLexer.cpp
===================================================================
--- lib/AsmParser/LLLexer.cpp	(revision 71552)
+++ lib/AsmParser/LLLexer.cpp	(working copy)
@@ -550,6 +550,7 @@

   KEYWORD(type);
   KEYWORD(opaque);
+  KEYWORD(union);

   KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
   KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
Index: lib/AsmParser/LLParser.h
===================================================================
--- lib/AsmParser/LLParser.h	(revision 71552)
+++ lib/AsmParser/LLParser.h	(working copy)
@@ -147,7 +147,7 @@
       return ParseType(Result, AllowVoid);
     }
     bool ParseTypeRec(PATypeHolder &H);
-    bool ParseStructType(PATypeHolder &H, bool Packed);
+    bool ParseStructType(PATypeHolder &H, bool Packed, bool Union);
     bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
     bool ParseFunctionType(PATypeHolder &Result);
     PATypeHolder HandleUpRefs(const Type *Ty);
Index: lib/AsmParser/LLToken.h
===================================================================
--- lib/AsmParser/LLToken.h	(revision 71552)
+++ lib/AsmParser/LLToken.h	(working copy)
@@ -83,6 +83,7 @@

     kw_type,
     kw_opaque,
+    kw_union,

     kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
     kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
Index: lib/Transforms/Scalar/ScalarReplAggregates.cpp
===================================================================
--- lib/Transforms/Scalar/ScalarReplAggregates.cpp	(revision 71552)
+++ lib/Transforms/Scalar/ScalarReplAggregates.cpp	(working copy)
@@ -562,8 +562,11 @@
   // into.
   for (; I != E; ++I) {
     // Ignore struct elements, no extra checking needed for these.
-    if (isa<StructType>(*I))
-      continue;
+    if (StructType* STy = dyn_cast<StructType>(*I))
+      if (STy->isUnion())
+        return MarkUnsafe(Info);
+      else
+        continue;

     ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
     if (!IdxVal) return MarkUnsafe(Info);
@@ -1090,8 +1093,10 @@

 /// HasPadding - Return true if the specified type has any structure or
 /// alignment padding, false otherwise.
+/// Unions are conservatively assumed to have padding
 static bool HasPadding(const Type *Ty, const TargetData &TD) {
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    if (STy->isUnion()) return true;
     const StructLayout *SL = TD.getStructLayout(STy);
     unsigned PrevFieldBitOffset = 0;
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Index: lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- lib/Bitcode/Reader/BitcodeReader.cpp	(revision 71552)
+++ lib/Bitcode/Reader/BitcodeReader.cpp	(working copy)
@@ -536,6 +536,13 @@
       ResultTy = StructType::get(EltTys, Record[0]);
       break;
     }
+    case bitc::TYPE_CODE_UNION: {  // UNION: [eltty x N]
+      std::vector<const Type*> EltTys;
+      for (unsigned i = 0, e = Record.size(); i != e; ++i)
+        EltTys.push_back(getTypeByID(Record[i], true));
+      ResultTy = StructType::get(EltTys, false, true);
+      break;
+    }
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid ARRAY type record");
Index: lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- lib/Bitcode/Writer/BitcodeWriter.cpp	(revision 71552)
+++ lib/Bitcode/Writer/BitcodeWriter.cpp	(working copy)
@@ -176,6 +176,14 @@
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_UNION.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);

   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
@@ -235,14 +243,25 @@
     }
     case Type::StructTyID: {
       const StructType *ST = cast<StructType>(T);
-      // STRUCT: [ispacked, eltty x N]
-      Code = bitc::TYPE_CODE_STRUCT;
-      TypeVals.push_back(ST->isPacked());
-      // Output all of the element types.
-      for (StructType::element_iterator I = ST->element_begin(),
-           E = ST->element_end(); I != E; ++I)
-        TypeVals.push_back(VE.getTypeID(*I));
-      AbbrevToUse = StructAbbrev;
+      if (!ST->isUnion()) {
+        // STRUCT: [ispacked, eltty x N]
+        Code = bitc::TYPE_CODE_STRUCT;
+        TypeVals.push_back(ST->isPacked());
+        // Output all of the element types.
+        for (StructType::element_iterator I = ST->element_begin(),
+               E = ST->element_end(); I != E; ++I)
+          TypeVals.push_back(VE.getTypeID(*I));
+        AbbrevToUse = StructAbbrev;
+      } else {
+        //Unify with STRUCT in LLVM 3.0
+        // UNION: [eltty x N]
+        Code = bitc::TYPE_CODE_UNION;
+        // Output all of the element types.
+        for (StructType::element_iterator I = ST->element_begin(),
+               E = ST->element_end(); I != E; ++I)
+          TypeVals.push_back(VE.getTypeID(*I));
+        AbbrevToUse = UnionAbbrev;
+      }
       break;
     }
     case Type::ArrayTyID: {



More information about the llvm-commits mailing list