[LLVMdev] Structure Types and ABI sizes

Tue Feb 15 18:14:54 PST 2011

Hi renato

Do you remember me?
Your comment on previous mailing list was so helpful for me :)

Nowdays I'm implementing modified LLVM IR to make target independent 
struct memory layout. Modified IR is changed to original LLVM IR later which
can use general llvm operations (optimizations, code gernerations, etc...)

I am inserting align information to type information to implement modified IR.

For example,

C source code

struct kist {
  char a:7;
  int b:20;
  short c:3;
  int d:15;
};
struct kist kang = {1, 2, 3, 4};

int main(void) {
  kang.d = 1;
  return 0;
}
-----------------------------------------------------------------
Modified LLVM IR

%struct.kist = type { i7(char), i20(int), i3(short), i15(int) }
@kang = global %struct.kist { i7(char) 1, i20(int) 2, i3(short) 3, i15(int) 4 } ; <%struct.kist*> [#uses=2]
define arm_aapcscc i32(int) @main() nounwind {
entry:
  %retval = alloca i32(int)                       ; <i32(int)*> [#uses=2]
  %0 = alloca i32(int)                            ; <i32(int)*> [#uses=2]
  %"alloca point" = bitcast i32(int) 0 to i32(int) ; <i32(int)> [#uses=0]
  ovmresolving store i15(int) 1, i15(int)* getelementptr inbounds (%struct.kist* @kang, i32(uint) 0, i32(int) 3), align 4
  store i32(int) 0, i32(int)* %0, align 4
  %1 = load i32(int)* %0, align 4                 ; <i32(int)> [#uses=1]
  store i32(int) %1, i32(int)* %retval, align 4
  br label %return
return:                                           ; preds = %entry
  %retval1 = load i32(int)* %retval               ; <i32(int)> [#uses=1]
  ret i32(int) %retval1
}
-----------------------------------------------------------------
Reconstructed LLVM IR

%0 = type { i8(char), i8(char), i8(char), i8(char), i8(char), i8(char), [2 x i8(char)] }
%struct.kist = type <{ i32(int), i16(short), [2 x i8(char)] }>
@kang = global %0 { i8(char) 1, i8(char) 1, i8(char) 0, i8(char) 24, i8(char) 4, i8(char) 0, [2 x i8(char)] zeroinitializer } ; <%0*> [#uses=1]
define arm_aapcscc i32(int) @main() nounwind {
entry:
  %retval = alloca i32(int)                       ; <i32(int)*> [#uses=2]
  %0 = alloca i32(int)                            ; <i32(int)*> [#uses=2]
  %"alloca point" = bitcast i32(int) 0 to i32(int) ; <i32(int)> [#uses=0]
  %1 = load i16(short)* getelementptr inbounds (%struct.kist* bitcast (%0* @kang to %struct.kist*), i32(uint) 0, i32(int) 1), align 1 ; <i16(short)> [#uses=1]
  %2 = and i16(short) %1, -32768                  ; <i16(short)> [#uses=1]
  %3 = or i16(short) %2, 1                        ; <i16(short)> [#uses=1]
  store i16(short) %3, i16(short)* getelementptr inbounds (%struct.kist* bitcast (%0* @kang to %struct.kist*), i32(uint) 0, i32(int) 1), align 1
  store i32(int) 0, i32(int)* %0, align 4
  %4 = load i32(int)* %0, align 4                 ; <i32(int)> [#uses=1]
  store i32(int) %4, i32(int)* %retval, align 4
  br label %return
return:                                           ; preds = %entry
  %retval1 = load i32(int)* %retval               ; <i32(int)> [#uses=1]
  ret i32(int) %retval1
}
-----------------------------------------------------------------

To insert align information into type information, I added align information to
llvm-gcc's type nodes and llvm's type information as follows:

example IntegerType

llvm-gcc file: gcc/tree.c
  TYPE_OVMALIGNID(char_type_node) = CharAlignID;
  TYPE_OVMALIGNID(signed_char_type_node) = CharAlignID;
  TYPE_OVMALIGNID(unsigned_char_type_node) = UCharAlignID;
  TYPE_OVMALIGNID(short_integer_type_node) = ShortAlignID;
  TYPE_OVMALIGNID(short_unsigned_type_node) = UShortAlignID;
  TYPE_OVMALIGNID(integer_type_node) = IntAlignID;
  TYPE_OVMALIGNID(unsigned_type_node) = UIntAlignID;
  TYPE_OVMALIGNID(long_integer_type_node) = LongAlignID;
  TYPE_OVMALIGNID(long_unsigned_type_node) = ULongAlignID;
  TYPE_OVMALIGNID(long_long_integer_type_node) = LongLongAlignID;
  TYPE_OVMALIGNID(long_long_unsigned_type_node) = ULongLongAlignID;

llvm file: include/llvm/Type.h based on llvm-2.6
class Type : public AbstractTypeUser {
...
private:
  unsigned AlignID : 7;  // insertion to add align information
...
protected
  explicit Type(LLVMContext &C, TypeID id) :
                             ID(id), Abstract(false), SubclassData(0),
                             AlignID(0), RefCount(0), Context(C),
                             ForwardType(0), NumContainedTys(0),
                             ContainedTys(0) {
...
  void setAlignID(unsigned ID) { AlignID = ID; }
  inline unsigned getAlignID() const { return AlignID; }
...
}

llvm file: include/llvm/Type.h based on llvm-2.6
class IntegerType : public DerivedType {
...
  explicit IntegerType(LLVMContext &C, unsigned NumBits, unsigned AlignID) :
      DerivedType(C, IntegerTyID) {
    setSubclassData(NumBits);
    setAlignID(AlignID);
  }
...
  static const IntegerType* get(LLVMContext &C, unsigned NumBits, unsigned AlignID);
...

llvm file: lib/VMCore/TypeContext.h based on llvm-2.6
class IntegerValType {
  IntegerValType(uint16_t numbits, unsigned alignid) : bits(numbits), AlignID(alignid){}
  static IntegerValType get(const IntegerType *Ty) {
    return IntegerValType(Ty->getBitWidth(), Ty->getAlignID());
  }

  static IntegerValType get(const IntegerType *Ty, unsigned alignid) {
    return IntegerValType(Ty->getBitWidth(), alignid);
  }

  static unsigned hashTypeStructure(const IntegerType *Ty) {
    return (unsigned)((Ty->getBitWidth() << 4) | Ty->getAlignID());
  }
  inline bool operator<(const IntegerValType &IVT) const {
    if (bits == IVT.bits)
      return AlignID < IVT.AlignID;
    else
      return bits < IVT.bits;

llvm file: lib/Bitcode/Writer/BitcodeWriter.cpp based on llvm-2.6
static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
...
      // INTEGER: [width] [alignID]
      Code = bitc::TYPE_CODE_INTEGER;
      TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
      TypeVals.push_back(cast<IntegerType>(T)->getAlignID());

llvm file: lib/Bitcode/Reader/BitcodeReader.cpp based on llvm-2.6
bool BitcodeReader::ParseTypeTable() {
...
    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
      if (Record.size() < 1)
        return Error("Invalid Integer type record");
      ResultTy = IntegerType::get(Context, Record[0], Record[1]);

llvm file: include/llvm/Constants.h based on llvm-2.6
class ConstantInt : public Constant {
...
  static Constant* get(const Type* Ty, uint64_t V, bool isSigned, unsigned AlignID);
  static ConstantInt* get(const IntegerType* Ty, uint64_t V,
                          bool isSigned, unsigned AlignID);
  static ConstantInt* get(LLVMContext &Context, const APInt& V, unsigned AlignID);
  static ConstantInt* get(const IntegerType* Ty, const StringRef& Str,
                          uint8_t radix, unsigned AlignID);
  static Constant* get(const Type* Ty, const APInt& V, unsigned AlignID);
...

llvm file: lib/VMCore/Constants.cpp based on llvm-2.6
ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V, unsigned AlignID) {
  // Get the corresponding integer type for the bit width of the value.
  const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth(), AlignID);
...

llvm file: lib/VMCore/AsmWriter.cpp based on llvm-2.6
void TypePrinting::CalcTypeName(const Type *Ty,
                                SmallVectorImpl<const Type *> &TypeStack,
                                raw_ostream &OS, bool IgnoreTopLevelName) {
...
    OS << '(';
    switch (cast<IntegerType>(Ty)->getAlignID()) {
    default:
      //assert(0 && "In OVM Unsupported Align ID");
      break;
    case Type::UndefAlignID:
      break;
    case Type::CharAlignID:
      OS << "char";
      break;
    case Type::UCharAlignID:
      OS << "uchar";
      break;
    case Type::ShortAlignID:
      OS << "short";
...
There are more modifed files and modified code to make modified IR.

Type information with align information like above code cause a lot of errors
on general llvm operations.
(For example, unique IntegerType on IntegerTypes TypeMap is decided
by bitwidth, so equality of Integer types on current llvm assumes Integer Types
with same bitwidth and a lot of passes use this assumption.)

I agree to add align information to type information because
we sometimes need to know original type.
(i3 can be char 3bit or short 3bit or int 3bit or long 3bit)

What do you think about i3(int) shape?  --> i3(alignID)

Thanks,
Jin-Gu Kang
________________________________________
From: llvmdev-bounces at cs.uiuc.edu [llvmdev-bounces at cs.uiuc.edu] On Behalf Of Renato Golin [renato.golin at arm.com]
Sent: Wednesday, February 16, 2011 3:09 AM
To: LLVM Developers Mailing List
Subject: [LLVMdev] Structure Types and ABI sizes

Hi all,

We're hitting some walls here when generating the correct structure
layout for specific C++ ABI requirements, and I was wondering how much
StructLayout could help.

For instance, the ABI has some complicated rules on the size of
derived classes
(http://www.codesourcery.com/public/cxx-abi/abi.html#class-types) and
LLVM struct type cannot reflect that in full.

Example:

// CHECK: %struct.I = type { i32, i8 }
struct I {
  int a;
  char b;
};

// CHECK: %struct.J = type { [8 x i8], i8, [3 x i8] }
struct J : I {
  char c;
};

What happens here is that "c" is placed in the base's tail padding and
there are three bytes padding because of the alignment. The main
problem with this is that, by changing the member (that should be a
structure) to an array, the alignment is lost. As LLVM types don't
have explicit alignment in themselves, it's impossible to recover that
information later and we need to make sure that every single use of
that field gets the correct alignment.

Furthermore, I wonder if that wouldn't impact some optimizations that
take types into account (as Chris has just replied in the vector
discussion)... Not sure...

So, I'm not proposing to have alignment in types nor to make LLVM
struct types conform to a specific ABI of a specific language, I'm
just saying that there should be a cleaner way... Very much like the
union type and bitfields, structure size and alignment problems can be
very hairy. Simplifying the IR and leaving all decisions to the
back-end can be a daunting task, but leaving the front-end to decide
on sizes and alignment is maybe not the best alternative.

StructLayout already knows a few things about structures (like
calculating the offset based on the type's alignment) but it's
ignorant regarding specific language decisions and ABIs. We could
attach some information regarding the language that is being compiled
so the back-end could make some informed choices on how to deal with
structures/unions/bitfields and have less hacks in the front-end.

I understand that cross-compilation between languages would break that
assumption, unless the IR has some kind of flags on it stating the
lang/abi used... but I know very few people like adding information to
the IR... :/

Any pointers on how to solve this issue in a better way other than
bloating the front-end?

--
cheers,
--renato
_______________________________________________
LLVM Developers mailing list
LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev