[llvm-commits] RFC: initial union syntax support

Nick Lewycky nicholas at mxc.ca
Tue May 12 23:19:28 PDT 2009


Nick Lewycky wrote:
> Andrew Lenharth wrote:
>> Updated patch.  Supports C, Cpp, and all (?) native codegen.  BasicAA
>> updated to be conservative (but not completely enough yet).
> 
> I still don't like the fact that StructType is UnionType, but that's 
> more subjective than objective.
> 
> I really don't like allowing a union to contain the same type twice. 
> That should be illegal no matter what the object hierarchy is.

For that matter, I just realized after sending that <union {i8, i9}> and 
<union {i9, i8}> must be the same type.

Nick

> 
>> Making unions a type of struct makes most code just work as most code
>> uses getOffset variants since structs can already have zero width
>> elements.
>>
>> Index: include/llvm/DerivedTypes.h
>> ===================================================================
>> --- include/llvm/DerivedTypes.h	(revision 71552)
>> +++ include/llvm/DerivedTypes.h	(working copy)
>> @@ -218,13 +218,17 @@
>>    friend class TypeMap<StructValType, StructType>;
>>    StructType(const StructType &);                   // Do not implement
>>    const StructType &operator=(const StructType &);  // Do not implement
>> -  StructType(const std::vector<const Type*> &Types, bool isPacked);
>> +  StructType(const std::vector<const Type*> &Types,
>> +             bool isPacked, bool isUnion);
>> +  enum {structPlain = 0,
>> +        structPacked = 1,
>> +        structUnion = 2};
>>  public:
>>    /// StructType::get - This static method is the primary way to create a
>>    /// StructType.
>>    ///
>>    static StructType *get(const std::vector<const Type*> &Params,
>> -                         bool isPacked=false);
>> +                         bool isPacked=false, bool isUnion=false);
>>
>>    /// StructType::get - This static method is a convenience method for
>>    /// creating structure types by specifying the elements as arguments.
>> @@ -262,7 +266,9 @@
>>      return T->getTypeID() == StructTyID;
>>    }
>>
>> -  bool isPacked() const { return (0 != getSubclassData()) ? true : false; }
>> +  bool isPacked() const { return structPacked == getSubclassData(); }
>> +  bool isUnion() const { return structUnion == getSubclassData(); }
>> +
>>  };
>>
>>
>> Index: include/llvm/Bitcode/LLVMBitCodes.h
>> ===================================================================
>> --- include/llvm/Bitcode/LLVMBitCodes.h	(revision 71552)
>> +++ include/llvm/Bitcode/LLVMBitCodes.h	(working copy)
>> @@ -90,7 +90,11 @@
>>      // binary compatibility.
>>      TYPE_CODE_X86_FP80 = 13,   // X86 LONG DOUBLE
>>      TYPE_CODE_FP128    = 14,   // LONG DOUBLE (112 bit mantissa)
>> -    TYPE_CODE_PPC_FP128= 15    // PPC LONG DOUBLE (2 doubles)
>> +    TYPE_CODE_PPC_FP128= 15,   // PPC LONG DOUBLE (2 doubles)
>> +
>> +    //Merge UNIOIN with STRUCT in LLVM 3.0
>> +    TYPE_CODE_UNION    = 16    // UNIOIN: [eltty x N]
> 
> UNIOIN -> UNION, twice.
> 
>> +
>>      // Any other type code is assumed to be an unknown type.
>>    };
>>
>> Index: docs/LangRef.html
>> ===================================================================
>> --- docs/LangRef.html	(revision 71552)
>> +++ docs/LangRef.html	(working copy)
>> @@ -1579,8 +1579,31 @@
>>    </tr>
>>  </table>
>>  </div>
>> -
>>  <!-- _______________________________________________________________________
>> -->
>> +<div class="doc_subsubsection"> <a name="t_ustruct">Union Structure Type</a>
>> +</div>
>> +<div class="doc_text">
>> +<h5>Overview:</h5>
>> +<p>The union structure type is used to represent a collection of data members
>> +overlapping in memory.  All fields start at an offset of zero.  The elements of
>> +a union structure may be any type that has a size.  The size of a union is the
>> +size of the largest element.  The alignment is the alignment of the
>> most restricted
>> +element.</p>
>> +<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
>> +and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
>> +field with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>'
>> +instruction.</p>
>> +<h5>Syntax:</h5>
>> +<pre>  < union { <type list> } > <br></pre>
>> +<h5>Examples:</h5>
>> +<table class="layout">
>> +  <tr class="layout">
>> +    <td class="left"><tt>< union { i32, i32*, i64 } ></tt></td>
>> +    <td class="left">A union of three values</td>
>> +  </tr><tr class="layout">
>> +</table>
>> +</div>
>> +<!-- _______________________________________________________________________
>> -->
>>  <div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
>>  <div class="doc_text">
>>  <h5>Overview:</h5>
>> Index: lib/Analysis/BasicAliasAnalysis.cpp
>> ===================================================================
>> --- lib/Analysis/BasicAliasAnalysis.cpp	(revision 71552)
>> +++ lib/Analysis/BasicAliasAnalysis.cpp	(working copy)
>> @@ -529,6 +529,12 @@
>>
>>    const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
>>
>> +  //Fixme: Be conservative for Unions
>> +  //Fixme: This doesn't handle embedded unions
>> +  if (const StructType* STy =
>> dyn_cast<StructType>(GEPPointerTy->getElementType()))
>> +    if (STy->isUnion())
>> +      return MayAlias;
>> +
>>    // Find the (possibly empty) initial sequence of equal values...
>> which are not
>>    // necessarily constants.
>>    unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
>> @@ -570,7 +576,6 @@
>>      if (AllAreZeros) return MustAlias;
>>    }
>>
>> -
>>    // So now we know that the indexes derived from the base pointers,
>>    // which are known to alias, are different.  We can still determine a
>>    // no-alias result if there are differing constant pairs in the index
>> Index: lib/Target/CBackend/CBackend.cpp
>> ===================================================================
>> --- lib/Target/CBackend/CBackend.cpp	(revision 71552)
>> +++ lib/Target/CBackend/CBackend.cpp	(working copy)
>> @@ -2132,7 +2132,9 @@
>>    // Print out forward declarations for structure types before anything else!
>>    Out << "/* Structure forward decls */\n";
>>    for (; I != End; ++I) {
>> -    std::string Name = "struct l_" + Mang->makeNameProper(I->first);
>> +    const StructType* STy = dyn_cast<StructType>(I->second);
>> +    std::string Name = ((STy && STy->isUnion()) ? "union l_" : "struct l_")
>> +      + Mang->makeNameProper(I->first);
>>      Out << Name << ";\n";
>>      TypeNames.insert(std::make_pair(I->second, Name));
>>    }
>> Index: lib/Target/CppBackend/CPPBackend.cpp
>> ===================================================================
>> --- lib/Target/CppBackend/CPPBackend.cpp	(revision 71552)
>> +++ lib/Target/CppBackend/CPPBackend.cpp	(working copy)
>> @@ -572,7 +572,10 @@
>>        }
>>        Out << "StructType* " << typeName << " = StructType::get("
>>            << typeName << "_fields, /*isPacked=*/"
>> -          << (ST->isPacked() ? "true" : "false") << ");";
>> +          << (ST->isPacked() ? "true" : "false")
>> +          << ", /*isUnion=*/"
>> +          << (ST->isUnion() ? "true" : "false")
>> +          << ");";
>>        nl(Out);
>>        break;
>>      }
>> Index: lib/Target/TargetData.cpp
>> ===================================================================
>> --- lib/Target/TargetData.cpp	(revision 71552)
>> +++ lib/Target/TargetData.cpp	(working copy)
>> @@ -57,8 +57,13 @@
>>      // Keep track of maximum alignment constraint.
>>      StructAlignment = std::max(TyAlign, StructAlignment);
>>
>> -    MemberOffsets[i] = StructSize;
>> -    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
>> +    if (ST->isUnion()) {
>> +      MemberOffsets[i] = 0;
>> +      StructSize = std::max(StructSize, TD.getTypeAllocSize(Ty));
>> +    } else {
>> +      MemberOffsets[i] = StructSize;
>> +      StructSize += TD.getTypeAllocSize(Ty); // Consume space for
>> this data item
>> +    }
>>    }
>>
>>    // Empty structures have alignment of 1 byte.
>> @@ -84,6 +89,7 @@
>>           "Upper bound didn't work!");
>>
>>    // Multiple fields can have the same offset if any of them are zero sized.
>> +  // This will also happen for union structures
>>    // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
>>    // at the i32 element, because it is the last element at that
>> offset.  This is
>>    // the right one to return, because anything after it will have a higher
>> Index: lib/VMCore/AsmWriter.cpp
>> ===================================================================
>> --- lib/VMCore/AsmWriter.cpp	(revision 71552)
>> +++ lib/VMCore/AsmWriter.cpp	(working copy)
>> @@ -225,6 +225,8 @@
>>      const StructType *STy = cast<StructType>(Ty);
>>      if (STy->isPacked())
>>        OS << '<';
>> +    if (STy->isUnion())
>> +      OS << "union ";
>>      OS << "{ ";
>>      for (StructType::element_iterator I = STy->element_begin(),
>>           E = STy->element_end(); I != E; ++I) {
>> Index: lib/VMCore/Type.cpp
>> ===================================================================
>> --- lib/VMCore/Type.cpp	(revision 71552)
>> +++ lib/VMCore/Type.cpp	(working copy)
>> @@ -338,11 +338,13 @@
>>    setAbstract(isAbstract);
>>  }
>>
>> -StructType::StructType(const std::vector<const Type*> &Types, bool isPacked)
>> +StructType::StructType(const std::vector<const Type*> &Types, bool isPacked,
>> +                       bool isUnion)
>>    : CompositeType(StructTyID) {
>>    ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
>>    NumContainedTys = Types.size();
>> -  setSubclassData(isPacked);
>> +  assert(!(isPacked && isUnion) && "Packed union not supported");
>> +  setSubclassData(isPacked + (int)isUnion * 2);
>>    bool isAbstract = false;
>>    for (unsigned i = 0; i < Types.size(); ++i) {
>>      assert(Types[i] != Type::VoidTy && "Void type for structure field!!");
>> @@ -1107,9 +1109,11 @@
>>  class StructValType {
>>    std::vector<const Type*> ElTypes;
>>    bool packed;
>> +  bool _union;
>>  public:
>> -  StructValType(const std::vector<const Type*> &args, bool isPacked)
>> -    : ElTypes(args), packed(isPacked) {}
>> +  StructValType(const std::vector<const Type*> &args,
>> +                bool isPacked, bool isUnion)
>> +    : ElTypes(args), packed(isPacked), _union(isUnion) {}
>>
>>    static StructValType get(const StructType *ST) {
>>      std::vector<const Type *> ElTypes;
>> @@ -1117,7 +1121,7 @@
>>      for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
>>        ElTypes.push_back(ST->getElementType(i));
>>
>> -    return StructValType(ElTypes, ST->isPacked());
>> +    return StructValType(ElTypes, ST->isPacked(),  ST->isUnion());
>>    }
>>
>>    static unsigned hashTypeStructure(const StructType *ST) {
>> @@ -1127,7 +1131,9 @@
>>    inline bool operator<(const StructValType &STV) const {
>>      if (ElTypes < STV.ElTypes) return true;
>>      else if (ElTypes > STV.ElTypes) return false;
>> -    else return (int)packed < (int)STV.packed;
>> +    else
>> +      return (int)packed + 2 * (int)_union
>> +           < (int)STV.packed + 2 * (int)STV._union;
>>    }
>>  };
>>  }
>> @@ -1135,15 +1141,15 @@
>>  static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;
>>
>>  StructType *StructType::get(const std::vector<const Type*> &ETypes,
>> -                            bool isPacked) {
>> -  StructValType STV(ETypes, isPacked);
>> +                            bool isPacked, bool isUnion) {
>> +  StructValType STV(ETypes, isPacked, isUnion);
>>    StructType *ST = StructTypes->get(STV);
>>    if (ST) return ST;
>>
>>    // Value not found.  Derive a new type!
>>    ST = (StructType*) operator new(sizeof(StructType) +
>>                                    sizeof(PATypeHandle) * ETypes.size());
>> -  new (ST) StructType(ETypes, isPacked);
>> +  new (ST) StructType(ETypes, isPacked, isUnion);
>>    StructTypes->add(STV, ST);
>>
>>  #ifdef DEBUG_MERGE_TYPES
>> Index: lib/AsmParser/LLParser.cpp
>> ===================================================================
>> --- lib/AsmParser/LLParser.cpp	(revision 71552)
>> +++ lib/AsmParser/LLParser.cpp	(working copy)
>> @@ -964,9 +964,15 @@
>>      Result = OpaqueType::get();
>>      Lex.Lex();
>>      break;
>> +  case lltok::kw_union:
>> +    // TypeRec ::= 'union' ...
>> +    Lex.Lex(); //eat the kw_union
>> +    if (ParseStructType(Result, false, true))
>> +      return true;
>> +    break;
>>    case lltok::lbrace:
>>      // TypeRec ::= '{' ... '}'
>> -    if (ParseStructType(Result, false))
>> +    if (ParseStructType(Result, false, false))
>>        return true;
>>      break;
>>    case lltok::lsquare:
>> @@ -979,7 +985,7 @@
>>      // TypeRec ::= '<' ... '>'
>>      Lex.Lex();
>>      if (Lex.getKind() == lltok::lbrace) {
>> -      if (ParseStructType(Result, true) ||
>> +      if (ParseStructType(Result, true, false) ||
>>            ParseToken(lltok::greater, "expected '>' at end of packed struct"))
>>          return true;
>>      } else if (ParseArrayVectorType(Result, true))
>> @@ -1222,18 +1228,22 @@
>>    return false;
>>  }
>>
>> -/// ParseStructType: Handles packed and unpacked types.  </> parsed elsewhere.
>> +/// ParseStructType: Handles packed and unpacked types.  </> and union
>> +///                  parsed elsewhere.
>>  ///   TypeRec
>>  ///     ::= '{' '}'
>>  ///     ::= '{' TypeRec (',' TypeRec)* '}'
>>  ///     ::= '<' '{' '}' '>'
>>  ///     ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
>> -bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
>> +///     ::= 'union' '{' '}'
>> +///     ::= 'union' '{' TypeRec (',' TypeRec)* '}'
>> +
>> +bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed, bool Union) {
>>    assert(Lex.getKind() == lltok::lbrace);
>>    Lex.Lex(); // Consume the '{'
>>
>>    if (EatIfPresent(lltok::rbrace)) {
>> -    Result = StructType::get(std::vector<const Type*>(), Packed);
>> +    Result = StructType::get(std::vector<const Type*>(), Packed, Union);
>>      return false;
>>    }
>>
>> @@ -1261,7 +1271,7 @@
>>    std::vector<const Type*> ParamsListTy;
>>    for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
>>      ParamsListTy.push_back(ParamsList[i].get());
>> -  Result = HandleUpRefs(StructType::get(ParamsListTy, Packed));
>> +  Result = HandleUpRefs(StructType::get(ParamsListTy, Packed, Union));
>>    return false;
>>  }
>>
>> Index: lib/AsmParser/LLLexer.cpp
>> ===================================================================
>> --- lib/AsmParser/LLLexer.cpp	(revision 71552)
>> +++ lib/AsmParser/LLLexer.cpp	(working copy)
>> @@ -550,6 +550,7 @@
>>
>>    KEYWORD(type);
>>    KEYWORD(opaque);
>> +  KEYWORD(union);
>>
>>    KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
>>    KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
>> Index: lib/AsmParser/LLParser.h
>> ===================================================================
>> --- lib/AsmParser/LLParser.h	(revision 71552)
>> +++ lib/AsmParser/LLParser.h	(working copy)
>> @@ -147,7 +147,7 @@
>>        return ParseType(Result, AllowVoid);
>>      }
>>      bool ParseTypeRec(PATypeHolder &H);
>> -    bool ParseStructType(PATypeHolder &H, bool Packed);
>> +    bool ParseStructType(PATypeHolder &H, bool Packed, bool Union);
>>      bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
>>      bool ParseFunctionType(PATypeHolder &Result);
>>      PATypeHolder HandleUpRefs(const Type *Ty);
>> Index: lib/AsmParser/LLToken.h
>> ===================================================================
>> --- lib/AsmParser/LLToken.h	(revision 71552)
>> +++ lib/AsmParser/LLToken.h	(working copy)
>> @@ -83,6 +83,7 @@
>>
>>      kw_type,
>>      kw_opaque,
>> +    kw_union,
>>
>>      kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
>>      kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
>> Index: lib/Transforms/Scalar/ScalarReplAggregates.cpp
>> ===================================================================
>> --- lib/Transforms/Scalar/ScalarReplAggregates.cpp	(revision 71552)
>> +++ lib/Transforms/Scalar/ScalarReplAggregates.cpp	(working copy)
>> @@ -562,8 +562,11 @@
>>    // into.
>>    for (; I != E; ++I) {
>>      // Ignore struct elements, no extra checking needed for these.
>> -    if (isa<StructType>(*I))
>> -      continue;
>> +    if (StructType* STy = dyn_cast<StructType>(*I))
>> +      if (STy->isUnion())
>> +        return MarkUnsafe(Info);
>> +      else
>> +        continue;
>>
>>      ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
>>      if (!IdxVal) return MarkUnsafe(Info);
>> @@ -1090,8 +1093,10 @@
>>
>>  /// HasPadding - Return true if the specified type has any structure or
>>  /// alignment padding, false otherwise.
>> +/// Unions are conservatively assumed to have padding
>>  static bool HasPadding(const Type *Ty, const TargetData &TD) {
>>    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
>> +    if (STy->isUnion()) return true;
>>      const StructLayout *SL = TD.getStructLayout(STy);
>>      unsigned PrevFieldBitOffset = 0;
>>      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
>> Index: lib/Bitcode/Reader/BitcodeReader.cpp
>> ===================================================================
>> --- lib/Bitcode/Reader/BitcodeReader.cpp	(revision 71552)
>> +++ lib/Bitcode/Reader/BitcodeReader.cpp	(working copy)
>> @@ -536,6 +536,13 @@
>>        ResultTy = StructType::get(EltTys, Record[0]);
>>        break;
>>      }
>> +    case bitc::TYPE_CODE_UNION: {  // UNION: [eltty x N]
>> +      std::vector<const Type*> EltTys;
>> +      for (unsigned i = 0, e = Record.size(); i != e; ++i)
>> +        EltTys.push_back(getTypeByID(Record[i], true));
>> +      ResultTy = StructType::get(EltTys, false, true);
>> +      break;
>> +    }
>>      case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
>>        if (Record.size() < 2)
>>          return Error("Invalid ARRAY type record");
>> Index: lib/Bitcode/Writer/BitcodeWriter.cpp
>> ===================================================================
>> --- lib/Bitcode/Writer/BitcodeWriter.cpp	(revision 71552)
>> +++ lib/Bitcode/Writer/BitcodeWriter.cpp	(working copy)
>> @@ -176,6 +176,14 @@
>>    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
>>                              Log2_32_Ceil(VE.getTypes().size()+1)));
>>    unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
>> +
>> +  // Abbrev for TYPE_CODE_UNION.
>> +  Abbv = new BitCodeAbbrev();
>> +  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
>> +  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
>> +  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
>> +                            Log2_32_Ceil(VE.getTypes().size()+1)));
>> +  unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
>>
>>    // Abbrev for TYPE_CODE_ARRAY.
>>    Abbv = new BitCodeAbbrev();
>> @@ -235,14 +243,25 @@
>>      }
>>      case Type::StructTyID: {
>>        const StructType *ST = cast<StructType>(T);
>> -      // STRUCT: [ispacked, eltty x N]
>> -      Code = bitc::TYPE_CODE_STRUCT;
>> -      TypeVals.push_back(ST->isPacked());
>> -      // Output all of the element types.
>> -      for (StructType::element_iterator I = ST->element_begin(),
>> -           E = ST->element_end(); I != E; ++I)
>> -        TypeVals.push_back(VE.getTypeID(*I));
>> -      AbbrevToUse = StructAbbrev;
>> +      if (!ST->isUnion()) {
>> +        // STRUCT: [ispacked, eltty x N]
>> +        Code = bitc::TYPE_CODE_STRUCT;
>> +        TypeVals.push_back(ST->isPacked());
>> +        // Output all of the element types.
>> +        for (StructType::element_iterator I = ST->element_begin(),
>> +               E = ST->element_end(); I != E; ++I)
>> +          TypeVals.push_back(VE.getTypeID(*I));
>> +        AbbrevToUse = StructAbbrev;
>> +      } else {
>> +        //Unify with STRUCT in LLVM 3.0
>> +        // UNION: [eltty x N]
>> +        Code = bitc::TYPE_CODE_UNION;
>> +        // Output all of the element types.
>> +        for (StructType::element_iterator I = ST->element_begin(),
>> +               E = ST->element_end(); I != E; ++I)
>> +          TypeVals.push_back(VE.getTypeID(*I));
>> +        AbbrevToUse = UnionAbbrev;
>> +      }
>>        break;
>>      }
>>      case Type::ArrayTyID: {
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 




More information about the llvm-commits mailing list