[llvm-commits] RFC: initial union syntax support
Nick Lewycky
nicholas at mxc.ca
Tue May 12 23:07:47 PDT 2009
Andrew Lenharth wrote:
> Updated patch. Supports C, Cpp, and all (?) native codegen. BasicAA
> updated to be conservative (but not completely enough yet).
I still don't like the fact that StructType is UnionType, but that's
more subjective than objective.
I really don't like allowing a union to contain the same type twice.
That should be illegal no matter what the object hierarchy is.
Nick
> Making unions a type of struct makes most code just work as most code
> uses getOffset variants since structs can already have zero width
> elements.
>
> Index: include/llvm/DerivedTypes.h
> ===================================================================
> --- include/llvm/DerivedTypes.h (revision 71552)
> +++ include/llvm/DerivedTypes.h (working copy)
> @@ -218,13 +218,17 @@
> friend class TypeMap<StructValType, StructType>;
> StructType(const StructType &); // Do not implement
> const StructType &operator=(const StructType &); // Do not implement
> - StructType(const std::vector<const Type*> &Types, bool isPacked);
> + StructType(const std::vector<const Type*> &Types,
> + bool isPacked, bool isUnion);
> + enum {structPlain = 0,
> + structPacked = 1,
> + structUnion = 2};
> public:
> /// StructType::get - This static method is the primary way to create a
> /// StructType.
> ///
> static StructType *get(const std::vector<const Type*> &Params,
> - bool isPacked=false);
> + bool isPacked=false, bool isUnion=false);
>
> /// StructType::get - This static method is a convenience method for
> /// creating structure types by specifying the elements as arguments.
> @@ -262,7 +266,9 @@
> return T->getTypeID() == StructTyID;
> }
>
> - bool isPacked() const { return (0 != getSubclassData()) ? true : false; }
> + bool isPacked() const { return structPacked == getSubclassData(); }
> + bool isUnion() const { return structUnion == getSubclassData(); }
> +
> };
>
>
> Index: include/llvm/Bitcode/LLVMBitCodes.h
> ===================================================================
> --- include/llvm/Bitcode/LLVMBitCodes.h (revision 71552)
> +++ include/llvm/Bitcode/LLVMBitCodes.h (working copy)
> @@ -90,7 +90,11 @@
> // binary compatibility.
> TYPE_CODE_X86_FP80 = 13, // X86 LONG DOUBLE
> TYPE_CODE_FP128 = 14, // LONG DOUBLE (112 bit mantissa)
> - TYPE_CODE_PPC_FP128= 15 // PPC LONG DOUBLE (2 doubles)
> + TYPE_CODE_PPC_FP128= 15, // PPC LONG DOUBLE (2 doubles)
> +
> + //Merge UNIOIN with STRUCT in LLVM 3.0
> + TYPE_CODE_UNION = 16 // UNIOIN: [eltty x N]
UNIOIN -> UNION, twice.
> +
> // Any other type code is assumed to be an unknown type.
> };
>
> Index: docs/LangRef.html
> ===================================================================
> --- docs/LangRef.html (revision 71552)
> +++ docs/LangRef.html (working copy)
> @@ -1579,8 +1579,31 @@
> </tr>
> </table>
> </div>
> -
> <!-- _______________________________________________________________________
> -->
> +<div class="doc_subsubsection"> <a name="t_ustruct">Union Structure Type</a>
> +</div>
> +<div class="doc_text">
> +<h5>Overview:</h5>
> +<p>The union structure type is used to represent a collection of data members
> +overlapping in memory. All fields start at an offset of zero. The elements of
> +a union structure may be any type that has a size. The size of a union is the
> +size of the largest element. The alignment is the alignment of the
> most restricted
> +element.</p>
> +<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
> +and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
> +field with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>'
> +instruction.</p>
> +<h5>Syntax:</h5>
> +<pre> < union { <type list> } > <br></pre>
> +<h5>Examples:</h5>
> +<table class="layout">
> + <tr class="layout">
> + <td class="left"><tt>< union { i32, i32*, i64 } ></tt></td>
> + <td class="left">A union of three values</td>
> + </tr><tr class="layout">
> +</table>
> +</div>
> +<!-- _______________________________________________________________________
> -->
> <div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
> <div class="doc_text">
> <h5>Overview:</h5>
> Index: lib/Analysis/BasicAliasAnalysis.cpp
> ===================================================================
> --- lib/Analysis/BasicAliasAnalysis.cpp (revision 71552)
> +++ lib/Analysis/BasicAliasAnalysis.cpp (working copy)
> @@ -529,6 +529,12 @@
>
> const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
>
> + //Fixme: Be conservative for Unions
> + //Fixme: This doesn't handle embedded unions
> + if (const StructType* STy =
> dyn_cast<StructType>(GEPPointerTy->getElementType()))
> + if (STy->isUnion())
> + return MayAlias;
> +
> // Find the (possibly empty) initial sequence of equal values...
> which are not
> // necessarily constants.
> unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
> @@ -570,7 +576,6 @@
> if (AllAreZeros) return MustAlias;
> }
>
> -
> // So now we know that the indexes derived from the base pointers,
> // which are known to alias, are different. We can still determine a
> // no-alias result if there are differing constant pairs in the index
> Index: lib/Target/CBackend/CBackend.cpp
> ===================================================================
> --- lib/Target/CBackend/CBackend.cpp (revision 71552)
> +++ lib/Target/CBackend/CBackend.cpp (working copy)
> @@ -2132,7 +2132,9 @@
> // Print out forward declarations for structure types before anything else!
> Out << "/* Structure forward decls */\n";
> for (; I != End; ++I) {
> - std::string Name = "struct l_" + Mang->makeNameProper(I->first);
> + const StructType* STy = dyn_cast<StructType>(I->second);
> + std::string Name = ((STy && STy->isUnion()) ? "union l_" : "struct l_")
> + + Mang->makeNameProper(I->first);
> Out << Name << ";\n";
> TypeNames.insert(std::make_pair(I->second, Name));
> }
> Index: lib/Target/CppBackend/CPPBackend.cpp
> ===================================================================
> --- lib/Target/CppBackend/CPPBackend.cpp (revision 71552)
> +++ lib/Target/CppBackend/CPPBackend.cpp (working copy)
> @@ -572,7 +572,10 @@
> }
> Out << "StructType* " << typeName << " = StructType::get("
> << typeName << "_fields, /*isPacked=*/"
> - << (ST->isPacked() ? "true" : "false") << ");";
> + << (ST->isPacked() ? "true" : "false")
> + << ", /*isUnion=*/"
> + << (ST->isUnion() ? "true" : "false")
> + << ");";
> nl(Out);
> break;
> }
> Index: lib/Target/TargetData.cpp
> ===================================================================
> --- lib/Target/TargetData.cpp (revision 71552)
> +++ lib/Target/TargetData.cpp (working copy)
> @@ -57,8 +57,13 @@
> // Keep track of maximum alignment constraint.
> StructAlignment = std::max(TyAlign, StructAlignment);
>
> - MemberOffsets[i] = StructSize;
> - StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
> + if (ST->isUnion()) {
> + MemberOffsets[i] = 0;
> + StructSize = std::max(StructSize, TD.getTypeAllocSize(Ty));
> + } else {
> + MemberOffsets[i] = StructSize;
> + StructSize += TD.getTypeAllocSize(Ty); // Consume space for
> this data item
> + }
> }
>
> // Empty structures have alignment of 1 byte.
> @@ -84,6 +89,7 @@
> "Upper bound didn't work!");
>
> // Multiple fields can have the same offset if any of them are zero sized.
> + // This will also happen for union structures
> // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
> // at the i32 element, because it is the last element at that
> offset. This is
> // the right one to return, because anything after it will have a higher
> Index: lib/VMCore/AsmWriter.cpp
> ===================================================================
> --- lib/VMCore/AsmWriter.cpp (revision 71552)
> +++ lib/VMCore/AsmWriter.cpp (working copy)
> @@ -225,6 +225,8 @@
> const StructType *STy = cast<StructType>(Ty);
> if (STy->isPacked())
> OS << '<';
> + if (STy->isUnion())
> + OS << "union ";
> OS << "{ ";
> for (StructType::element_iterator I = STy->element_begin(),
> E = STy->element_end(); I != E; ++I) {
> Index: lib/VMCore/Type.cpp
> ===================================================================
> --- lib/VMCore/Type.cpp (revision 71552)
> +++ lib/VMCore/Type.cpp (working copy)
> @@ -338,11 +338,13 @@
> setAbstract(isAbstract);
> }
>
> -StructType::StructType(const std::vector<const Type*> &Types, bool isPacked)
> +StructType::StructType(const std::vector<const Type*> &Types, bool isPacked,
> + bool isUnion)
> : CompositeType(StructTyID) {
> ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
> NumContainedTys = Types.size();
> - setSubclassData(isPacked);
> + assert(!(isPacked && isUnion) && "Packed union not supported");
> + setSubclassData(isPacked + (int)isUnion * 2);
> bool isAbstract = false;
> for (unsigned i = 0; i < Types.size(); ++i) {
> assert(Types[i] != Type::VoidTy && "Void type for structure field!!");
> @@ -1107,9 +1109,11 @@
> class StructValType {
> std::vector<const Type*> ElTypes;
> bool packed;
> + bool _union;
> public:
> - StructValType(const std::vector<const Type*> &args, bool isPacked)
> - : ElTypes(args), packed(isPacked) {}
> + StructValType(const std::vector<const Type*> &args,
> + bool isPacked, bool isUnion)
> + : ElTypes(args), packed(isPacked), _union(isUnion) {}
>
> static StructValType get(const StructType *ST) {
> std::vector<const Type *> ElTypes;
> @@ -1117,7 +1121,7 @@
> for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
> ElTypes.push_back(ST->getElementType(i));
>
> - return StructValType(ElTypes, ST->isPacked());
> + return StructValType(ElTypes, ST->isPacked(), ST->isUnion());
> }
>
> static unsigned hashTypeStructure(const StructType *ST) {
> @@ -1127,7 +1131,9 @@
> inline bool operator<(const StructValType &STV) const {
> if (ElTypes < STV.ElTypes) return true;
> else if (ElTypes > STV.ElTypes) return false;
> - else return (int)packed < (int)STV.packed;
> + else
> + return (int)packed + 2 * (int)_union
> + < (int)STV.packed + 2 * (int)STV._union;
> }
> };
> }
> @@ -1135,15 +1141,15 @@
> static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;
>
> StructType *StructType::get(const std::vector<const Type*> &ETypes,
> - bool isPacked) {
> - StructValType STV(ETypes, isPacked);
> + bool isPacked, bool isUnion) {
> + StructValType STV(ETypes, isPacked, isUnion);
> StructType *ST = StructTypes->get(STV);
> if (ST) return ST;
>
> // Value not found. Derive a new type!
> ST = (StructType*) operator new(sizeof(StructType) +
> sizeof(PATypeHandle) * ETypes.size());
> - new (ST) StructType(ETypes, isPacked);
> + new (ST) StructType(ETypes, isPacked, isUnion);
> StructTypes->add(STV, ST);
>
> #ifdef DEBUG_MERGE_TYPES
> Index: lib/AsmParser/LLParser.cpp
> ===================================================================
> --- lib/AsmParser/LLParser.cpp (revision 71552)
> +++ lib/AsmParser/LLParser.cpp (working copy)
> @@ -964,9 +964,15 @@
> Result = OpaqueType::get();
> Lex.Lex();
> break;
> + case lltok::kw_union:
> + // TypeRec ::= 'union' ...
> + Lex.Lex(); //eat the kw_union
> + if (ParseStructType(Result, false, true))
> + return true;
> + break;
> case lltok::lbrace:
> // TypeRec ::= '{' ... '}'
> - if (ParseStructType(Result, false))
> + if (ParseStructType(Result, false, false))
> return true;
> break;
> case lltok::lsquare:
> @@ -979,7 +985,7 @@
> // TypeRec ::= '<' ... '>'
> Lex.Lex();
> if (Lex.getKind() == lltok::lbrace) {
> - if (ParseStructType(Result, true) ||
> + if (ParseStructType(Result, true, false) ||
> ParseToken(lltok::greater, "expected '>' at end of packed struct"))
> return true;
> } else if (ParseArrayVectorType(Result, true))
> @@ -1222,18 +1228,22 @@
> return false;
> }
>
> -/// ParseStructType: Handles packed and unpacked types. </> parsed elsewhere.
> +/// ParseStructType: Handles packed and unpacked types. </> and union
> +/// parsed elsewhere.
> /// TypeRec
> /// ::= '{' '}'
> /// ::= '{' TypeRec (',' TypeRec)* '}'
> /// ::= '<' '{' '}' '>'
> /// ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
> -bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
> +/// ::= 'union' '{' '}'
> +/// ::= 'union' '{' TypeRec (',' TypeRec)* '}'
> +
> +bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed, bool Union) {
> assert(Lex.getKind() == lltok::lbrace);
> Lex.Lex(); // Consume the '{'
>
> if (EatIfPresent(lltok::rbrace)) {
> - Result = StructType::get(std::vector<const Type*>(), Packed);
> + Result = StructType::get(std::vector<const Type*>(), Packed, Union);
> return false;
> }
>
> @@ -1261,7 +1271,7 @@
> std::vector<const Type*> ParamsListTy;
> for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
> ParamsListTy.push_back(ParamsList[i].get());
> - Result = HandleUpRefs(StructType::get(ParamsListTy, Packed));
> + Result = HandleUpRefs(StructType::get(ParamsListTy, Packed, Union));
> return false;
> }
>
> Index: lib/AsmParser/LLLexer.cpp
> ===================================================================
> --- lib/AsmParser/LLLexer.cpp (revision 71552)
> +++ lib/AsmParser/LLLexer.cpp (working copy)
> @@ -550,6 +550,7 @@
>
> KEYWORD(type);
> KEYWORD(opaque);
> + KEYWORD(union);
>
> KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
> KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
> Index: lib/AsmParser/LLParser.h
> ===================================================================
> --- lib/AsmParser/LLParser.h (revision 71552)
> +++ lib/AsmParser/LLParser.h (working copy)
> @@ -147,7 +147,7 @@
> return ParseType(Result, AllowVoid);
> }
> bool ParseTypeRec(PATypeHolder &H);
> - bool ParseStructType(PATypeHolder &H, bool Packed);
> + bool ParseStructType(PATypeHolder &H, bool Packed, bool Union);
> bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
> bool ParseFunctionType(PATypeHolder &Result);
> PATypeHolder HandleUpRefs(const Type *Ty);
> Index: lib/AsmParser/LLToken.h
> ===================================================================
> --- lib/AsmParser/LLToken.h (revision 71552)
> +++ lib/AsmParser/LLToken.h (working copy)
> @@ -83,6 +83,7 @@
>
> kw_type,
> kw_opaque,
> + kw_union,
>
> kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
> kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
> Index: lib/Transforms/Scalar/ScalarReplAggregates.cpp
> ===================================================================
> --- lib/Transforms/Scalar/ScalarReplAggregates.cpp (revision 71552)
> +++ lib/Transforms/Scalar/ScalarReplAggregates.cpp (working copy)
> @@ -562,8 +562,11 @@
> // into.
> for (; I != E; ++I) {
> // Ignore struct elements, no extra checking needed for these.
> - if (isa<StructType>(*I))
> - continue;
> + if (StructType* STy = dyn_cast<StructType>(*I))
> + if (STy->isUnion())
> + return MarkUnsafe(Info);
> + else
> + continue;
>
> ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
> if (!IdxVal) return MarkUnsafe(Info);
> @@ -1090,8 +1093,10 @@
>
> /// HasPadding - Return true if the specified type has any structure or
> /// alignment padding, false otherwise.
> +/// Unions are conservatively assumed to have padding
> static bool HasPadding(const Type *Ty, const TargetData &TD) {
> if (const StructType *STy = dyn_cast<StructType>(Ty)) {
> + if (STy->isUnion()) return true;
> const StructLayout *SL = TD.getStructLayout(STy);
> unsigned PrevFieldBitOffset = 0;
> for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
> Index: lib/Bitcode/Reader/BitcodeReader.cpp
> ===================================================================
> --- lib/Bitcode/Reader/BitcodeReader.cpp (revision 71552)
> +++ lib/Bitcode/Reader/BitcodeReader.cpp (working copy)
> @@ -536,6 +536,13 @@
> ResultTy = StructType::get(EltTys, Record[0]);
> break;
> }
> + case bitc::TYPE_CODE_UNION: { // UNION: [eltty x N]
> + std::vector<const Type*> EltTys;
> + for (unsigned i = 0, e = Record.size(); i != e; ++i)
> + EltTys.push_back(getTypeByID(Record[i], true));
> + ResultTy = StructType::get(EltTys, false, true);
> + break;
> + }
> case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
> if (Record.size() < 2)
> return Error("Invalid ARRAY type record");
> Index: lib/Bitcode/Writer/BitcodeWriter.cpp
> ===================================================================
> --- lib/Bitcode/Writer/BitcodeWriter.cpp (revision 71552)
> +++ lib/Bitcode/Writer/BitcodeWriter.cpp (working copy)
> @@ -176,6 +176,14 @@
> Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
> Log2_32_Ceil(VE.getTypes().size()+1)));
> unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
> +
> + // Abbrev for TYPE_CODE_UNION.
> + Abbv = new BitCodeAbbrev();
> + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
> + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
> + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
> + Log2_32_Ceil(VE.getTypes().size()+1)));
> + unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
>
> // Abbrev for TYPE_CODE_ARRAY.
> Abbv = new BitCodeAbbrev();
> @@ -235,14 +243,25 @@
> }
> case Type::StructTyID: {
> const StructType *ST = cast<StructType>(T);
> - // STRUCT: [ispacked, eltty x N]
> - Code = bitc::TYPE_CODE_STRUCT;
> - TypeVals.push_back(ST->isPacked());
> - // Output all of the element types.
> - for (StructType::element_iterator I = ST->element_begin(),
> - E = ST->element_end(); I != E; ++I)
> - TypeVals.push_back(VE.getTypeID(*I));
> - AbbrevToUse = StructAbbrev;
> + if (!ST->isUnion()) {
> + // STRUCT: [ispacked, eltty x N]
> + Code = bitc::TYPE_CODE_STRUCT;
> + TypeVals.push_back(ST->isPacked());
> + // Output all of the element types.
> + for (StructType::element_iterator I = ST->element_begin(),
> + E = ST->element_end(); I != E; ++I)
> + TypeVals.push_back(VE.getTypeID(*I));
> + AbbrevToUse = StructAbbrev;
> + } else {
> + //Unify with STRUCT in LLVM 3.0
> + // UNION: [eltty x N]
> + Code = bitc::TYPE_CODE_UNION;
> + // Output all of the element types.
> + for (StructType::element_iterator I = ST->element_begin(),
> + E = ST->element_end(); I != E; ++I)
> + TypeVals.push_back(VE.getTypeID(*I));
> + AbbrevToUse = UnionAbbrev;
> + }
> break;
> }
> case Type::ArrayTyID: {
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list