[llvm-commits] RFC: initial union syntax support

Andrew Lenharth andrewl at lenharth.org
Tue May 12 14:56:13 PDT 2009


with a couple more patches, codegen is working.  the main in the
example compiles to:

        movl    foos(%rip), %ecx
        leal    (%rcx,%rcx), %eax
        addl    %ecx, %eax
        ret


On Tue, May 12, 2009 at 1:01 PM, Andrew Lenharth <andrewl at lenharth.org> wrote:
> Below is initial syntax for union types.  This is just syntax, I
> haven't done an audit to find places that assume struct fields don't
> overlap, nor updated the offset calculation code.  A separate derived
> class from CompositeType or StructType would also be reasonable if
> people have objections to overloading StructType.
>
> This patch is enough to do things like:
> ; ModuleID = '<stdin>'
>        type union { i32, i8 }          ; type %0
>        %struct.anon = type union { i8, i32, i32, i32 }
> @foos = external global %struct.anon            ; <%struct.anon*> [#uses=3]
> @bara = external global [2 x %0]                ; <[2 x %0]*> [#uses=2]
>
> define i32 @main() {
>        %tmp = load i32* getelementptr (%struct.anon* @foos, i32 0,
> i32 1)              ; <i32> [#uses=1]
>        %tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0,
> i32 2)             ; <i32> [#uses=1]
>        %tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0,
> i32 3)             ; <i32> [#uses=1]
>        %tmp4 = add i32 %tmp3, %tmp             ; <i32> [#uses=1]
>        %tmp7 = add i32 %tmp4, %tmp6            ; <i32> [#uses=1]
>        ret i32 %tmp7
> }
>
> define i32 @bar() {
> entry:
>        %tmp = load i32* getelementptr ([2 x %0]* @bara, i32 0, i32 0,
> i32 0)           ; <i32> [#uses=1]
>        %tmp4 = load i32* getelementptr ([2 x %0]* @bara, i32 0, i32
> 1, i32 0)          ; <i32> [#uses=1]
>        %tmp5 = add i32 %tmp4, %tmp             ; <i32> [#uses=1]
>        ret i32 %tmp5
> }
>
>
>
> Index: include/llvm/DerivedTypes.h
> ===================================================================
> --- include/llvm/DerivedTypes.h (revision 71552)
> +++ include/llvm/DerivedTypes.h (working copy)
> @@ -218,13 +218,17 @@
>   friend class TypeMap<StructValType, StructType>;
>   StructType(const StructType &);                   // Do not
> implement
>   const StructType &operator=(const StructType &);  // Do not
> implement
> -  StructType(const std::vector<const Type*> &Types, bool isPacked);
> +  StructType(const std::vector<const Type*> &Types,
> +             bool isPacked, bool isUnion);
> +  enum {structPlain = 0,
> +        structPacked = 1,
> +        structUnion = 2};
>  public:
>   /// StructType::get - This static method is the primary way to
> create a
>   /// StructType.
>   ///
>   static StructType *get(const std::vector<const Type*> &Params,
> -                         bool isPacked=false);
> +                         bool isPacked=false, bool isUnion=false);
>
>   /// StructType::get - This static method is a convenience method
> for
>   /// creating structure types by specifying the elements as
> arguments.
> @@ -262,7 +266,9 @@
>     return T->getTypeID() == StructTyID;
>   }
>
> -  bool isPacked() const { return (0 != getSubclassData()) ? true :
> false; }
> +  bool isPacked() const { return structPacked == getSubclassData(); }
> +  bool isUnion() const { return structUnion == getSubclassData(); }
> +
>  };
>
>
> Index: include/llvm/Bitcode/LLVMBitCodes.h
> ===================================================================
> --- include/llvm/Bitcode/LLVMBitCodes.h (revision 71552)
> +++ include/llvm/Bitcode/LLVMBitCodes.h (working copy)
> @@ -90,7 +90,11 @@
>     // binary compatibility.
>     TYPE_CODE_X86_FP80 = 13,   // X86 LONG DOUBLE
>     TYPE_CODE_FP128    = 14,   // LONG DOUBLE (112 bit mantissa)
> -    TYPE_CODE_PPC_FP128= 15    // PPC LONG DOUBLE (2 doubles)
> +    TYPE_CODE_PPC_FP128= 15,   // PPC LONG DOUBLE (2 doubles)
> +
> +    //Merge UNIOIN with STRUCT in LLVM 3.0
> +    TYPE_CODE_UNION    = 16    // UNIOIN: [eltty x N]
> +
>     // Any other type code is assumed to be an unknown type.
>   };
>
> Index: docs/LangRef.html
> ===================================================================
> --- docs/LangRef.html   (revision 71552)
> +++ docs/LangRef.html   (working copy)
> @@ -1579,8 +1579,31 @@
>   </tr>
>  </table>
>  </div>
> -
>  <!-- _______________________________________________________________________
> -->
> +<div class="doc_subsubsection"> <a name="t_ustruct">Union Structure
> Type</a>
> +</div>
> +<div class="doc_text">
> +<h5>Overview:</h5>
> +<p>The union structure type is used to represent a collection of data
> members
> +overlapping in memory.  All fields start at an offset of zero.  The
> elements of
> +a union structure may be any type that has a size.  The size of a
> union is the
> +size of the largest element.  The alignment is the alignment of the
> most restricted
> +element.</p>
> +<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
> +and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
> +field with the '<tt><a
> href="#i_getelementptr">getelementptr</a></tt>'
> +instruction.</p>
> +<h5>Syntax:</h5>
> +<pre>  < union { <type list> } > <br></pre>
> +<h5>Examples:</h5>
> +<table class="layout">
> +  <tr class="layout">
> +    <td class="left"><tt>< union { i32, i32*, i64 } ></tt></td>
> +    <td class="left">A union of three values</td>
> +  </tr><tr class="layout">
> +</table>
> +</div>
> +<!-- _______________________________________________________________________
> -->
>  <div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a>
> </div>
>  <div class="doc_text">
>  <h5>Overview:</h5>
> Index: lib/VMCore/AsmWriter.cpp
> ===================================================================
> --- lib/VMCore/AsmWriter.cpp    (revision 71552)
> +++ lib/VMCore/AsmWriter.cpp    (working copy)
> @@ -225,6 +225,8 @@
>     const StructType *STy = cast<StructType>(Ty);
>     if (STy->isPacked())
>       OS << '<';
> +    if (STy->isUnion())
> +      OS << "union ";
>     OS << "{ ";
>     for (StructType::element_iterator I = STy->element_begin(),
>          E = STy->element_end(); I != E; ++I) {
> Index: lib/VMCore/Type.cpp
> ===================================================================
> --- lib/VMCore/Type.cpp (revision 71552)
> +++ lib/VMCore/Type.cpp (working copy)
> @@ -338,11 +338,13 @@
>   setAbstract(isAbstract);
>  }
>
> -StructType::StructType(const std::vector<const Type*> &Types, bool
> isPacked)
> +StructType::StructType(const std::vector<const Type*> &Types, bool
> isPacked,
> +                       bool isUnion)
>   : CompositeType(StructTyID) {
>   ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
>   NumContainedTys = Types.size();
> -  setSubclassData(isPacked);
> +  assert(!(isPacked && isUnion) && "Packed union not supported");
> +  setSubclassData(isPacked + (int)isUnion * 2);
>   bool isAbstract = false;
>   for (unsigned i = 0; i < Types.size(); ++i) {
>     assert(Types[i] != Type::VoidTy && "Void type for structure
> field!!");
> @@ -1107,9 +1109,11 @@
>  class StructValType {
>   std::vector<const Type*> ElTypes;
>   bool packed;
> +  bool _union;
>  public:
> -  StructValType(const std::vector<const Type*> &args, bool isPacked)
> -    : ElTypes(args), packed(isPacked) {}
> +  StructValType(const std::vector<const Type*> &args,
> +                bool isPacked, bool isUnion)
> +    : ElTypes(args), packed(isPacked), _union(isUnion) {}
>
>   static StructValType get(const StructType *ST) {
>     std::vector<const Type *> ElTypes;
> @@ -1117,7 +1121,7 @@
>     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
>       ElTypes.push_back(ST->getElementType(i));
>
> -    return StructValType(ElTypes, ST->isPacked());
> +    return StructValType(ElTypes, ST->isPacked(),  ST->isUnion());
>   }
>
>   static unsigned hashTypeStructure(const StructType *ST) {
> @@ -1127,7 +1131,9 @@
>   inline bool operator<(const StructValType &STV) const {
>     if (ElTypes < STV.ElTypes) return true;
>     else if (ElTypes > STV.ElTypes) return false;
> -    else return (int)packed < (int)STV.packed;
> +    else
> +      return (int)packed + 2 * (int)_union
> +           < (int)STV.packed + 2 * (int)STV._union;
>   }
>  };
>  }
> @@ -1135,15 +1141,15 @@
>  static ManagedStatic<TypeMap<StructValType, StructType> >
> StructTypes;
>
>  StructType *StructType::get(const std::vector<const Type*> &ETypes,
> -                            bool isPacked) {
> -  StructValType STV(ETypes, isPacked);
> +                            bool isPacked, bool isUnion) {
> +  StructValType STV(ETypes, isPacked, isUnion);
>   StructType *ST = StructTypes->get(STV);
>   if (ST) return ST;
>
>   // Value not found.  Derive a new type!
>   ST = (StructType*) operator new(sizeof(StructType) +
>                                   sizeof(PATypeHandle) *
> ETypes.size());
> -  new (ST) StructType(ETypes, isPacked);
> +  new (ST) StructType(ETypes, isPacked, isUnion);
>   StructTypes->add(STV, ST);
>
>  #ifdef DEBUG_MERGE_TYPES
> Index: lib/AsmParser/LLParser.cpp
> ===================================================================
> --- lib/AsmParser/LLParser.cpp  (revision 71552)
> +++ lib/AsmParser/LLParser.cpp  (working copy)
> @@ -964,9 +964,15 @@
>     Result = OpaqueType::get();
>     Lex.Lex();
>     break;
> +  case lltok::kw_union:
> +    // TypeRec ::= 'union' ...
> +    Lex.Lex(); //eat the kw_union
> +    if (ParseStructType(Result, false, true))
> +      return true;
> +    break;
>   case lltok::lbrace:
>     // TypeRec ::= '{' ... '}'
> -    if (ParseStructType(Result, false))
> +    if (ParseStructType(Result, false, false))
>       return true;
>     break;
>   case lltok::lsquare:
> @@ -979,7 +985,7 @@
>     // TypeRec ::= '<' ... '>'
>     Lex.Lex();
>     if (Lex.getKind() == lltok::lbrace) {
> -      if (ParseStructType(Result, true) ||
> +      if (ParseStructType(Result, true, false) ||
>           ParseToken(lltok::greater, "expected '>' at end of packed
> struct"))
>         return true;
>     } else if (ParseArrayVectorType(Result, true))
> @@ -1222,18 +1228,22 @@
>   return false;
>  }
>
> -/// ParseStructType: Handles packed and unpacked types.  </> parsed
> elsewhere.
> +/// ParseStructType: Handles packed and unpacked types.  </> and
> union
> +///                  parsed elsewhere.
>  ///   TypeRec
>  ///     ::= '{' '}'
>  ///     ::= '{' TypeRec (',' TypeRec)* '}'
>  ///     ::= '<' '{' '}' '>'
>  ///     ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
> -bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
> +///     ::= 'union' '{' '}'
> +///     ::= 'union' '{' TypeRec (',' TypeRec)* '}'
> +
> +bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed,
> bool Union) {
>   assert(Lex.getKind() == lltok::lbrace);
>   Lex.Lex(); // Consume the '{'
>
>   if (EatIfPresent(lltok::rbrace)) {
> -    Result = StructType::get(std::vector<const Type*>(), Packed);
> +    Result = StructType::get(std::vector<const Type*>(), Packed,
> Union);
>     return false;
>   }
>
> @@ -1261,7 +1271,7 @@
>   std::vector<const Type*> ParamsListTy;
>   for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
>     ParamsListTy.push_back(ParamsList[i].get());
> -  Result = HandleUpRefs(StructType::get(ParamsListTy, Packed));
> +  Result = HandleUpRefs(StructType::get(ParamsListTy, Packed,
> Union));
>   return false;
>  }
>
> Index: lib/AsmParser/LLLexer.cpp
> ===================================================================
> --- lib/AsmParser/LLLexer.cpp   (revision 71552)
> +++ lib/AsmParser/LLLexer.cpp   (working copy)
> @@ -550,6 +550,7 @@
>
>   KEYWORD(type);
>   KEYWORD(opaque);
> +  KEYWORD(union);
>
>   KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
>   KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule);
> KEYWORD(uge);
> Index: lib/AsmParser/LLParser.h
> ===================================================================
> --- lib/AsmParser/LLParser.h    (revision 71552)
> +++ lib/AsmParser/LLParser.h    (working copy)
> @@ -147,7 +147,7 @@
>       return ParseType(Result, AllowVoid);
>     }
>     bool ParseTypeRec(PATypeHolder &H);
> -    bool ParseStructType(PATypeHolder &H, bool Packed);
> +    bool ParseStructType(PATypeHolder &H, bool Packed, bool Union);
>     bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
>     bool ParseFunctionType(PATypeHolder &Result);
>     PATypeHolder HandleUpRefs(const Type *Ty);
> Index: lib/AsmParser/LLToken.h
> ===================================================================
> --- lib/AsmParser/LLToken.h     (revision 71552)
> +++ lib/AsmParser/LLToken.h     (working copy)
> @@ -83,6 +83,7 @@
>
>     kw_type,
>     kw_opaque,
> +    kw_union,
>
>     kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt,
> kw_ule,
>     kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord,
> kw_uno,
> Index: lib/Bitcode/Reader/BitcodeReader.cpp
> ===================================================================
> --- lib/Bitcode/Reader/BitcodeReader.cpp        (revision 71552)
> +++ lib/Bitcode/Reader/BitcodeReader.cpp        (working copy)
> @@ -536,6 +536,13 @@
>       ResultTy = StructType::get(EltTys, Record[0]);
>       break;
>     }
> +    case bitc::TYPE_CODE_UNION: {  // UNION: [eltty x N]
> +      std::vector<const Type*> EltTys;
> +      for (unsigned i = 0, e = Record.size(); i != e; ++i)
> +        EltTys.push_back(getTypeByID(Record[i], true));
> +      ResultTy = StructType::get(EltTys, false, true);
> +      break;
> +    }
>     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
>       if (Record.size() < 2)
>         return Error("Invalid ARRAY type record");
> Index: lib/Bitcode/Writer/BitcodeWriter.cpp
> ===================================================================
> --- lib/Bitcode/Writer/BitcodeWriter.cpp        (revision 71552)
> +++ lib/Bitcode/Writer/BitcodeWriter.cpp        (working copy)
> @@ -176,6 +176,14 @@
>   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
>                             Log2_32_Ceil(VE.getTypes().size()+1)));
>   unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
> +
> +  // Abbrev for TYPE_CODE_UNION.
> +  Abbv = new BitCodeAbbrev();
> +  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
> +  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
> +  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
> +                            Log2_32_Ceil(VE.getTypes().size()+1)));
> +  unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
>
>   // Abbrev for TYPE_CODE_ARRAY.
>   Abbv = new BitCodeAbbrev();
> @@ -235,14 +243,25 @@
>     }
>     case Type::StructTyID: {
>       const StructType *ST = cast<StructType>(T);
> -      // STRUCT: [ispacked, eltty x N]
> -      Code = bitc::TYPE_CODE_STRUCT;
> -      TypeVals.push_back(ST->isPacked());
> -      // Output all of the element types.
> -      for (StructType::element_iterator I = ST->element_begin(),
> -           E = ST->element_end(); I != E; ++I)
> -        TypeVals.push_back(VE.getTypeID(*I));
> -      AbbrevToUse = StructAbbrev;
> +      if (!ST->isUnion()) {
> +        // STRUCT: [ispacked, eltty x N]
> +        Code = bitc::TYPE_CODE_STRUCT;
> +        TypeVals.push_back(ST->isPacked());
> +        // Output all of the element types.
> +        for (StructType::element_iterator I = ST->element_begin(),
> +               E = ST->element_end(); I != E; ++I)
> +          TypeVals.push_back(VE.getTypeID(*I));
> +        AbbrevToUse = StructAbbrev;
> +      } else {
> +        //Unify with STRUCT in LLVM 3.0
> +        // UNION: [eltty x N]
> +        Code = bitc::TYPE_CODE_UNION;
> +        // Output all of the element types.
> +        for (StructType::element_iterator I = ST->element_begin(),
> +               E = ST->element_end(); I != E; ++I)
> +          TypeVals.push_back(VE.getTypeID(*I));
> +        AbbrevToUse = UnionAbbrev;
> +      }
>       break;
>     }
>     case Type::ArrayTyID: {
>




More information about the llvm-commits mailing list