[LLVMdev] [patch] Writing ConstantUnions

Tim Northover T.P.Northover at sms.ed.ac.uk
Mon Mar 15 14:30:34 PDT 2010


On Mon, Mar 15, 2010 at 11:51:47AM +0000, Tim Northover wrote:
> Hello,
> 
> I noticed a bit of a gap in the current code for unions: a
> ConstantUnion cannot be written out to .ll. 

I've been continuing plugging gaps as I find them, which might not be
the best way to solve this problem, but it has produced something that
seems to do roughly what I expect.

I've split it into three broadly independent patches

1: bitcodeUnion.patch. Again writing constants, I think we need to send
the type index along with the actual data. I've (ab?)used the Aggregate
abbreviation still.

2: asmUnion.patch. Code to emit constant unions as native assembly. Also
includes handling of GEP.

3: layoutUnion.patch. I added functions to calculate the required
size and alignment for unions. I wasn't sure whether to assume alignment
was a power of 2. If not, that function probably needs changing to take
a running lcm rather than maximum.

Together these seem to allow both "llvm-as | llvm-dis" and "llvm-as |
llc" on the following fragment:

%un = type union { {i8, i8}, i32, i8 }
@val = constant %un { i32 42 }

I'm slightly concerned about the ".align 3" I get for arm (and x86 as I
recall), but the same happens with structs so I don't think it's
related.

Any comments or requests to please stop bugging you fine.

Tim.
-------------- next part --------------
Index: lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- lib/Bitcode/Reader/BitcodeReader.cpp	(revision 98552)
+++ lib/Bitcode/Reader/BitcodeReader.cpp	(working copy)
@@ -293,6 +293,8 @@
       } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
         NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
                                          UserCS->getType()->isPacked());
+      } else if (ConstantUnion *UserCU = dyn_cast<ConstantUnion>(UserC)) {
+        NewC = ConstantUnion::get(UserCU->getType(), NewOps[0]);
       } else if (isa<ConstantVector>(UserC)) {
         NewC = ConstantVector::get(&NewOps[0], NewOps.size());
       } else {
@@ -1015,6 +1017,11 @@
           Elts.push_back(ValueList.getConstantFwdRef(Record[i],
                                                      STy->getElementType(i)));
         V = ConstantStruct::get(STy, Elts);
+      } else if (const UnionType *UnTy = dyn_cast<UnionType>(CurTy)) {
+        uint64_t Index = Record[0];
+        Constant *Val = ValueList.getConstantFwdRef(Record[1],
+                                        UnTy->getElementType(Index));
+        V = ConstantUnion::get(UnTy, Val);
       } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
         const Type *EltTy = ATy->getElementType();
         for (unsigned i = 0; i != Size; ++i)
Index: lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- lib/Bitcode/Writer/BitcodeWriter.cpp	(revision 98552)
+++ lib/Bitcode/Writer/BitcodeWriter.cpp	(working copy)
@@ -808,11 +808,25 @@
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
     } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
-               isa<ConstantUnion>(C) || isa<ConstantVector>(V)) {
+               isa<ConstantVector>(V)) {
       Code = bitc::CST_CODE_AGGREGATE;
       for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         Record.push_back(VE.getValueID(C->getOperand(i)));
       AbbrevToUse = AggregateAbbrev;
+    } else if (isa<ConstantUnion>(C)) {
+      Code = bitc::CST_CODE_AGGREGATE;
+
+      // Unions only have one entry but we must send type along with it.
+      const Type *EntryKind = C->getOperand(0)->getType();
+
+      const UnionType *UnTy = cast<UnionType>(C->getType());
+      int UnionIndex = UnTy->getElementTypeIndex(EntryKind);
+      assert(UnionIndex != -1 && "Constant union contains invalid entry");
+
+      Record.push_back(UnionIndex);
+      Record.push_back(VE.getValueID(C->getOperand(0)));
+
+      AbbrevToUse = AggregateAbbrev;
     } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       switch (CE->getOpcode()) {
       default:
-------------- next part --------------
Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/AsmPrinter.cpp	(revision 98552)
+++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp	(working copy)
@@ -1128,6 +1128,21 @@
          "Layout of constant struct may be incorrect!");
 }
 
+static void EmitGlobalConstantUnion(const ConstantUnion *CU, 
+                                    unsigned AddrSpace, AsmPrinter &AP) {
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CU->getType());
+
+  const Constant *Contents = CU->getOperand(0);
+  unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
+    
+  // Print the actually filled part
+  AP.EmitGlobalConstant(Contents, AddrSpace);
+
+  // And pad with enough zeroes
+  AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
+}
+
 static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
   // FP Constants are printed as integer constants to avoid losing
@@ -1245,6 +1260,9 @@
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
     return EmitGlobalConstantStruct(CVS, AddrSpace, *this);
 
+  if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV)) {
+    return EmitGlobalConstantUnion(CVU, AddrSpace, *this);
+  }
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
     return EmitGlobalConstantFP(CFP, AddrSpace, *this);
   
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	(revision 98552)
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	(working copy)
@@ -2591,6 +2591,11 @@
       }
 
       Ty = StTy->getElementType(Field);
+    } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      
+      // Offset canonically 0 for unions, but type changes
+      Ty = UnTy->getElementType(Field);
     } else {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
-------------- next part --------------
Index: lib/Target/TargetData.cpp
===================================================================
--- lib/Target/TargetData.cpp	(revision 98552)
+++ lib/Target/TargetData.cpp	(working copy)
@@ -460,6 +460,15 @@
   case Type::StructTyID:
     // Get the layout annotation... which is lazily created on demand.
     return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+  case Type::UnionTyID: {
+    const UnionType *UnTy = cast<UnionType>(Ty);
+    uint64_t Size = 0;
+    for (UnionType::element_iterator i = UnTy->element_begin(),
+             e = UnTy->element_end(); i != e; ++i) {
+      Size = std::max(Size, getTypeSizeInBits(*i));
+    }
+    return Size;
+  }
   case Type::IntegerTyID:
     return cast<IntegerType>(Ty)->getBitWidth();
   case Type::VoidTyID:
@@ -516,6 +525,17 @@
     unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
     return std::max(Align, (unsigned)Layout->getAlignment());
   }
+  case Type::UnionTyID: {
+    const UnionType *UnTy = cast<UnionType>(Ty);
+    unsigned Align = 1;
+
+    // Unions need the maximum alignment of all their entries
+    for (UnionType::element_iterator i = UnTy->element_begin(), 
+             e = UnTy->element_end(); i != e; ++i) {
+      Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref));
+    }
+    return Align;
+  }
   case Type::IntegerTyID:
   case Type::VoidTyID:
     AlignType = INTEGER_ALIGN;
@@ -600,6 +620,11 @@
 
       // Update Ty to refer to current element
       Ty = STy->getElementType(FieldNo);
+    } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) {
+        unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+        // Offset into union is canonically 0, but type changes
+        Ty = UnTy->getElementType(FieldNo);
     } else {
       // Update Ty to refer to current element
       Ty = cast<SequentialType>(Ty)->getElementType();


More information about the llvm-dev mailing list