r246985 - Compute and preserve alignment more faithfully in IR-generation.

Chandler Carruth via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 10 15:22:13 PDT 2015


I've reproduced this with the same technique.

John, let me know if you need help debugging this, but this is blocking a
ton of stuff for us so I'm going to revert for now.

On Thu, Sep 10, 2015 at 3:13 PM David Majnemer <david.majnemer at gmail.com>
wrote:

> The failure can be reproduced using ./libjpeg-turbo/build/.libs/lt-djpeg
> -dct fast -outfile testout_420_q100_ifast.ppm
> testout_420_q100_ifast_prog.jpg, it will die
> in jsimd_ycc_rgb_convert_sse2.rowloop
>
> I am using https://github.com/libjpeg-turbo/libjpeg-turbo.git at
> revision 0d293537728f211888b04bed6ee19f71e0bda504
>
> Let me know if this isn't enough to reproduce.
>
> On Thu, Sep 10, 2015 at 2:39 PM, Chandler Carruth via cfe-commits <
> cfe-commits at lists.llvm.org> wrote:
>
>> In case anyone else is chasing the same thing, I wanted to post that
>> we're seeing crashes in code introduced by this commit. Specifically,
>> crashes inside of libjpeg-turbo's assembly implementation code, so its
>> proving very challenging to track down. It looks like either incorrect
>> vaargs stack setup, reaching past the redzone due to alignment padding, or
>> something else weird.
>>
>> Notably, it is *not* a SIGILL due to an overaligned access to memory.
>>
>> Anyways, if we get a test case I'll probably file it and revert, but its
>> proving *very* hard to track down so wanted to see if others have seen
>> something here.
>>
>> -Chandler
>>
>>
>> On Tue, Sep 8, 2015 at 1:07 AM John McCall via cfe-commits <
>> cfe-commits at lists.llvm.org> wrote:
>>
>>> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=246985&r1=246984&r2=246985&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
>>> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Sep  8 03:05:57 2015
>>> @@ -39,7 +39,7 @@ static void AssignToArrayRange(CodeGen::
>>>    for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
>>>      llvm::Value *Cell =
>>>          Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array,
>>> I);
>>> -    Builder.CreateStore(Value, Cell);
>>> +    Builder.CreateAlignedStore(Value, Cell, CharUnits::One());
>>>    }
>>>  }
>>>
>>> @@ -48,6 +48,19 @@ static bool isAggregateTypeForABI(QualTy
>>>           T->isMemberFunctionPointerType();
>>>  }
>>>
>>> +ABIArgInfo
>>> +ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByRef, bool Realign,
>>> +                                 llvm::Type *Padding) const {
>>> +  return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty),
>>> +                                 ByRef, Realign, Padding);
>>> +}
>>> +
>>> +ABIArgInfo
>>> +ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const {
>>> +  return
>>> ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty),
>>> +                                      /*ByRef*/ false, Realign);
>>> +}
>>> +
>>>  ABIInfo::~ABIInfo() {}
>>>
>>>  static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
>>> @@ -133,7 +146,7 @@ void ABIArgInfo::dump() const {
>>>      OS << "InAlloca Offset=" << getInAllocaFieldIndex();
>>>      break;
>>>    case Indirect:
>>> -    OS << "Indirect Align=" << getIndirectAlign()
>>> +    OS << "Indirect Align=" << getIndirectAlign().getQuantity()
>>>         << " ByVal=" << getIndirectByVal()
>>>         << " Realign=" << getIndirectRealign();
>>>      break;
>>> @@ -144,6 +157,125 @@ void ABIArgInfo::dump() const {
>>>    OS << ")\n";
>>>  }
>>>
>>> +/// Emit va_arg for a platform using the common void* representation,
>>> +/// where arguments are simply emitted in an array of slots on the
>>> stack.
>>> +///
>>> +/// This version implements the core direct-value passing rules.
>>> +///
>>> +/// \param SlotSize - The size and alignment of a stack slot.
>>> +///   Each argument will be allocated to a multiple of this number of
>>> +///   slots, and all the slots will be aligned to this value.
>>> +/// \param AllowHigherAlign - The slot alignment is not a cap;
>>> +///   an argument type with an alignment greater than the slot size
>>> +///   will be emitted on a higher-alignment address, potentially
>>> +///   leaving one or more empty slots behind as padding.  If this
>>> +///   is false, the returned address might be less-aligned than
>>> +///   DirectAlign.
>>> +static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
>>> +                                      Address VAListAddr,
>>> +                                      llvm::Type *DirectTy,
>>> +                                      CharUnits DirectSize,
>>> +                                      CharUnits DirectAlign,
>>> +                                      CharUnits SlotSize,
>>> +                                      bool AllowHigherAlign) {
>>> +  // Cast the element type to i8* if necessary.  Some platforms define
>>> +  // va_list as a struct containing an i8* instead of just an i8*.
>>> +  if (VAListAddr.getElementType() != CGF.Int8PtrTy)
>>> +    VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr,
>>> CGF.Int8PtrTy);
>>> +
>>> +  llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur");
>>> +
>>> +  // If the CC aligns values higher than the slot size, do so if needed.
>>> +  Address Addr = Address::invalid();
>>> +  if (AllowHigherAlign && DirectAlign > SlotSize) {
>>> +    llvm::Value *PtrAsInt = Ptr;
>>> +    PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy);
>>> +    PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt,
>>> +          llvm::ConstantInt::get(CGF.IntPtrTy,
>>> DirectAlign.getQuantity() - 1));
>>> +    PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt,
>>> +             llvm::ConstantInt::get(CGF.IntPtrTy,
>>> -DirectAlign.getQuantity()));
>>> +    Addr = Address(CGF.Builder.CreateIntToPtr(PtrAsInt, Ptr->getType(),
>>> +                                              "argp.cur.aligned"),
>>> +                   DirectAlign);
>>> +  } else {
>>> +    Addr = Address(Ptr, SlotSize);
>>> +  }
>>> +
>>> +  // Advance the pointer past the argument, then store that back.
>>> +  CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize);
>>> +  llvm::Value *NextPtr =
>>> +    CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(),
>>> FullDirectSize,
>>> +                                           "argp.next");
>>> +  CGF.Builder.CreateStore(NextPtr, VAListAddr);
>>> +
>>> +  // If the argument is smaller than a slot, and this is a big-endian
>>> +  // target, the argument will be right-adjusted in its slot.
>>> +  if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) {
>>> +    Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize -
>>> DirectSize);
>>> +  }
>>> +
>>> +  Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy);
>>> +  return Addr;
>>> +}
>>> +
>>> +/// Emit va_arg for a platform using the common void* representation,
>>> +/// where arguments are simply emitted in an array of slots on the
>>> stack.
>>> +///
>>> +/// \param IsIndirect - Values of this type are passed indirectly.
>>> +/// \param ValueInfo - The size and alignment of this type, generally
>>> +///   computed with getContext().getTypeInfoInChars(ValueTy).
>>> +/// \param SlotSizeAndAlign - The size and alignment of a stack slot.
>>> +///   Each argument will be allocated to a multiple of this number of
>>> +///   slots, and all the slots will be aligned to this value.
>>> +/// \param AllowHigherAlign - The slot alignment is not a cap;
>>> +///   an argument type with an alignment greater than the slot size
>>> +///   will be emitted on a higher-alignment address, potentially
>>> +///   leaving one or more empty slots behind as padding.
>>> +static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address
>>> VAListAddr,
>>> +                                QualType ValueTy, bool IsIndirect,
>>> +                                std::pair<CharUnits, CharUnits>
>>> ValueInfo,
>>> +                                CharUnits SlotSizeAndAlign,
>>> +                                bool AllowHigherAlign) {
>>> +  // The size and alignment of the value that was passed directly.
>>> +  CharUnits DirectSize, DirectAlign;
>>> +  if (IsIndirect) {
>>> +    DirectSize = CGF.getPointerSize();
>>> +    DirectAlign = CGF.getPointerAlign();
>>> +  } else {
>>> +    DirectSize = ValueInfo.first;
>>> +    DirectAlign = ValueInfo.second;
>>> +  }
>>> +
>>> +  // Cast the address we've calculated to the right type.
>>> +  llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy);
>>> +  if (IsIndirect)
>>> +    DirectTy = DirectTy->getPointerTo(0);
>>> +
>>> +  Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy,
>>> +                                        DirectSize, DirectAlign,
>>> +                                        SlotSizeAndAlign,
>>> +                                        AllowHigherAlign);
>>> +
>>> +  if (IsIndirect) {
>>> +    Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second);
>>> +  }
>>> +
>>> +  return Addr;
>>> +
>>> +}
>>> +
>>> +static Address emitMergePHI(CodeGenFunction &CGF,
>>> +                            Address Addr1, llvm::BasicBlock *Block1,
>>> +                            Address Addr2, llvm::BasicBlock *Block2,
>>> +                            const llvm::Twine &Name = "") {
>>> +  assert(Addr1.getType() == Addr2.getType());
>>> +  llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name);
>>> +  PHI->addIncoming(Addr1.getPointer(), Block1);
>>> +  PHI->addIncoming(Addr2.getPointer(), Block2);
>>> +  CharUnits Align = std::min(Addr1.getAlignment(),
>>> Addr2.getAlignment());
>>> +  return Address(PHI, Align);
>>> +}
>>> +
>>>  TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; }
>>>
>>>  // If someone can figure out a general rule for this, that would be
>>> great.
>>> @@ -394,8 +526,8 @@ public:
>>>        I.info = classifyArgumentType(I.type);
>>>    }
>>>
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override;
>>> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
>>> +                    QualType Ty) const override;
>>>  };
>>>
>>>  class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
>>> @@ -404,9 +536,9 @@ public:
>>>      : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
>>>  };
>>>
>>> -llvm::Value *DefaultABIInfo::EmitVAArg(llvm::Value *VAListAddr,
>>> QualType Ty,
>>> -                                       CodeGenFunction &CGF) const {
>>> -  return nullptr;
>>> +Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
>>> VAListAddr,
>>> +                                  QualType Ty) const {
>>> +  return Address::invalid();
>>>  }
>>>
>>>  ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
>>> @@ -416,9 +548,9 @@ ABIArgInfo DefaultABIInfo::classifyArgum
>>>      // Records with non-trivial destructors/copy-constructors should
>>> not be
>>>      // passed by value.
>>>      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
>>> -      return ABIArgInfo::getIndirect(0, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>> +      return getNaturalAlignIndirect(Ty, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>>
>>> -    return ABIArgInfo::getIndirect(0);
>>> +    return getNaturalAlignIndirect(Ty);
>>>    }
>>>
>>>    // Treat an enum type as its underlying type.
>>> @@ -434,7 +566,7 @@ ABIArgInfo DefaultABIInfo::classifyRetur
>>>      return ABIArgInfo::getIgnore();
>>>
>>>    if (isAggregateTypeForABI(RetTy))
>>> -    return ABIArgInfo::getIndirect(0);
>>> +    return getNaturalAlignIndirect(RetTy);
>>>
>>>    // Treat an enum type as its underlying type.
>>>    if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
>>> @@ -482,10 +614,8 @@ ABIArgInfo WebAssemblyABIInfo::classifyA
>>>    if (isAggregateTypeForABI(Ty)) {
>>>      // Records with non-trivial destructors/copy-constructors should
>>> not be
>>>      // passed by value.
>>> -    unsigned TypeAlign =
>>> getContext().getTypeAlignInChars(Ty).getQuantity();
>>>      if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
>>> -      return ABIArgInfo::getIndirect(TypeAlign,
>>> -                                     RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>> +      return getNaturalAlignIndirect(Ty, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>>      // Ignore empty structs/unions.
>>>      if (isEmptyRecord(getContext(), Ty, true))
>>>        return ABIArgInfo::getIgnore();
>>> @@ -494,7 +624,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyA
>>>      // though watch out for things like bitfields.
>>>      if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
>>>        return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy,
>>> 0)));
>>> -    return ABIArgInfo::getIndirect(TypeAlign);
>>> +    return getNaturalAlignIndirect(Ty);
>>>    }
>>>
>>>    // Otherwise just do the default thing.
>>> @@ -536,8 +666,8 @@ class PNaClABIInfo : public ABIInfo {
>>>    ABIArgInfo classifyArgumentType(QualType RetTy) const;
>>>
>>>    void computeInfo(CGFunctionInfo &FI) const override;
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override;
>>> +  Address EmitVAArg(CodeGenFunction &CGF,
>>> +                    Address VAListAddr, QualType Ty) const override;
>>>  };
>>>
>>>  class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
>>> @@ -554,17 +684,17 @@ void PNaClABIInfo::computeInfo(CGFunctio
>>>      I.info = classifyArgumentType(I.type);
>>>  }
>>>
>>> -llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType
>>> Ty,
>>> -                                       CodeGenFunction &CGF) const {
>>> -  return nullptr;
>>> +Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
>>> VAListAddr,
>>> +                                QualType Ty) const {
>>> +  return Address::invalid();
>>>  }
>>>
>>>  /// \brief Classify argument of given type \p Ty.
>>>  ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
>>>    if (isAggregateTypeForABI(Ty)) {
>>>      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
>>> -      return ABIArgInfo::getIndirect(0, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>> -    return ABIArgInfo::getIndirect(0);
>>> +      return getNaturalAlignIndirect(Ty, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>> +    return getNaturalAlignIndirect(Ty);
>>>    } else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
>>>      // Treat an enum type as its underlying type.
>>>      Ty = EnumTy->getDecl()->getIntegerType();
>>> @@ -583,7 +713,7 @@ ABIArgInfo PNaClABIInfo::classifyReturnT
>>>
>>>    // In the PNaCl ABI we always return records/structures on the stack.
>>>    if (isAggregateTypeForABI(RetTy))
>>> -    return ABIArgInfo::getIndirect(0);
>>> +    return getNaturalAlignIndirect(RetTy);
>>>
>>>    // Treat an enum type as its underlying type.
>>>    if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
>>> @@ -687,7 +817,7 @@ class X86_32ABIInfo : public ABIInfo {
>>>    /// such that the argument will be passed in memory.
>>>    ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State)
>>> const;
>>>
>>> -  ABIArgInfo getIndirectReturnResult(CCState &State) const;
>>> +  ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
>>>
>>>    /// \brief Return the alignment to use for the given type on the
>>> stack.
>>>    unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
>>> @@ -702,14 +832,14 @@ class X86_32ABIInfo : public ABIInfo {
>>>    void rewriteWithInAlloca(CGFunctionInfo &FI) const;
>>>
>>>    void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
>>> -                           unsigned &StackOffset, ABIArgInfo &Info,
>>> +                           CharUnits &StackOffset, ABIArgInfo &Info,
>>>                             QualType Type) const;
>>>
>>>  public:
>>>
>>>    void computeInfo(CGFunctionInfo &FI) const override;
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override;
>>> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
>>> +                    QualType Ty) const override;
>>>
>>>    X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w,
>>>                  unsigned r)
>>> @@ -893,14 +1023,14 @@ bool X86_32ABIInfo::shouldReturnTypeInRe
>>>    return true;
>>>  }
>>>
>>> -ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(CCState &State) const
>>> {
>>> +ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy,
>>> CCState &State) const {
>>>    // If the return value is indirect, then the hidden argument is
>>> consuming one
>>>    // integer register.
>>>    if (State.FreeRegs) {
>>>      --State.FreeRegs;
>>> -    return ABIArgInfo::getIndirectInReg(/*Align=*/0, /*ByVal=*/false);
>>> +    return getNaturalAlignIndirectInReg(RetTy);
>>>    }
>>> -  return ABIArgInfo::getIndirect(/*Align=*/0, /*ByVal=*/false);
>>> +  return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
>>>  }
>>>
>>>  ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
>>> @@ -935,7 +1065,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturn
>>>          return
>>> ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
>>>                                                              Size));
>>>
>>> -      return getIndirectReturnResult(State);
>>> +      return getIndirectReturnResult(RetTy, State);
>>>      }
>>>
>>>      return ABIArgInfo::getDirect();
>>> @@ -945,12 +1075,12 @@ ABIArgInfo X86_32ABIInfo::classifyReturn
>>>      if (const RecordType *RT = RetTy->getAs<RecordType>()) {
>>>        // Structures with flexible arrays are always indirect.
>>>        if (RT->getDecl()->hasFlexibleArrayMember())
>>> -        return getIndirectReturnResult(State);
>>> +        return getIndirectReturnResult(RetTy, State);
>>>      }
>>>
>>>      // If specified, structs and unions are always indirect.
>>>      if (!IsSmallStructInRegABI && !RetTy->isAnyComplexType())
>>> -      return getIndirectReturnResult(State);
>>> +      return getIndirectReturnResult(RetTy, State);
>>>
>>>      // Small structures which are register sized are generally returned
>>>      // in a register.
>>> @@ -972,7 +1102,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturn
>>>        return
>>> ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
>>>      }
>>>
>>> -    return getIndirectReturnResult(State);
>>> +    return getIndirectReturnResult(RetTy, State);
>>>    }
>>>
>>>    // Treat an enum type as its underlying type.
>>> @@ -1038,21 +1168,22 @@ ABIArgInfo X86_32ABIInfo::getIndirectRes
>>>    if (!ByVal) {
>>>      if (State.FreeRegs) {
>>>        --State.FreeRegs; // Non-byval indirects just use one pointer.
>>> -      return ABIArgInfo::getIndirectInReg(0, false);
>>> +      return getNaturalAlignIndirectInReg(Ty);
>>>      }
>>> -    return ABIArgInfo::getIndirect(0, false);
>>> +    return getNaturalAlignIndirect(Ty, false);
>>>    }
>>>
>>>    // Compute the byval alignment.
>>>    unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
>>>    unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
>>>    if (StackAlign == 0)
>>> -    return ABIArgInfo::getIndirect(4, /*ByVal=*/true);
>>> +    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4),
>>> /*ByVal=*/true);
>>>
>>>    // If the stack alignment is less than the type alignment, realign the
>>>    // argument.
>>>    bool Realign = TypeAlign > StackAlign;
>>> -  return ABIArgInfo::getIndirect(StackAlign, /*ByVal=*/true, Realign);
>>> +  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign),
>>> +                                 /*ByVal=*/true, Realign);
>>>  }
>>>
>>>  X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
>>> @@ -1259,22 +1390,23 @@ void X86_32ABIInfo::computeInfo(CGFuncti
>>>
>>>  void
>>>  X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6>
>>> &FrameFields,
>>> -                                   unsigned &StackOffset,
>>> -                                   ABIArgInfo &Info, QualType Type)
>>> const {
>>> -  assert(StackOffset % 4U == 0 && "unaligned inalloca struct");
>>> +                                   CharUnits &StackOffset, ABIArgInfo
>>> &Info,
>>> +                                   QualType Type) const {
>>> +  // Arguments are always 4-byte-aligned.
>>> +  CharUnits FieldAlign = CharUnits::fromQuantity(4);
>>> +
>>> +  assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca
>>> struct");
>>>    Info = ABIArgInfo::getInAlloca(FrameFields.size());
>>>    FrameFields.push_back(CGT.ConvertTypeForMem(Type));
>>> -  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
>>> +  StackOffset += getContext().getTypeSizeInChars(Type);
>>>
>>> -  // Insert padding bytes to respect alignment.  For x86_32, each
>>> argument is 4
>>> -  // byte aligned.
>>> -  if (StackOffset % 4U) {
>>> -    unsigned OldOffset = StackOffset;
>>> -    StackOffset = llvm::RoundUpToAlignment(StackOffset, 4U);
>>> -    unsigned NumBytes = StackOffset - OldOffset;
>>> -    assert(NumBytes);
>>> +  // Insert padding bytes to respect alignment.
>>> +  CharUnits FieldEnd = StackOffset;
>>> +  StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign);
>>> +  if (StackOffset != FieldEnd) {
>>> +    CharUnits NumBytes = StackOffset - FieldEnd;
>>>      llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
>>> -    Ty = llvm::ArrayType::get(Ty, NumBytes);
>>> +    Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
>>>      FrameFields.push_back(Ty);
>>>    }
>>>  }
>>> @@ -1305,7 +1437,10 @@ void X86_32ABIInfo::rewriteWithInAlloca(
>>>    // Build a packed struct type for all of the arguments in memory.
>>>    SmallVector<llvm::Type *, 6> FrameFields;
>>>
>>> -  unsigned StackOffset = 0;
>>> +  // The stack alignment is always 4.
>>> +  CharUnits StackAlign = CharUnits::fromQuantity(4);
>>> +
>>> +  CharUnits StackOffset;
>>>    CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
>>>
>>>    // Put 'this' into the struct before 'sret', if necessary.
>>> @@ -1337,47 +1472,25 @@ void X86_32ABIInfo::rewriteWithInAlloca(
>>>    }
>>>
>>>    FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
>>> -                                        /*isPacked=*/true));
>>> +                                        /*isPacked=*/true),
>>> +                  StackAlign);
>>>  }
>>>
>>> -llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType
>>> Ty,
>>> -                                      CodeGenFunction &CGF) const {
>>> -  llvm::Type *BPP = CGF.Int8PtrPtrTy;
>>> +Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
>>> +                                 Address VAListAddr, QualType Ty) const
>>> {
>>>
>>> -  CGBuilderTy &Builder = CGF.Builder;
>>> -  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
>>> -                                                       "ap");
>>> -  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
>>> -
>>> -  // Compute if the address needs to be aligned
>>> -  unsigned Align =
>>> CGF.getContext().getTypeAlignInChars(Ty).getQuantity();
>>> -  Align = getTypeStackAlignInBytes(Ty, Align);
>>> -  Align = std::max(Align, 4U);
>>> -  if (Align > 4) {
>>> -    // addr = (addr + align - 1) & -align;
>>> -    llvm::Value *Offset =
>>> -      llvm::ConstantInt::get(CGF.Int32Ty, Align - 1);
>>> -    Addr = CGF.Builder.CreateGEP(Addr, Offset);
>>> -    llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(Addr,
>>> -                                                    CGF.Int32Ty);
>>> -    llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -Align);
>>> -    Addr = CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt,
>>> Mask),
>>> -                                      Addr->getType(),
>>> -                                      "ap.cur.aligned");
>>> -  }
>>> -
>>> -  llvm::Type *PTy =
>>> -    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
>>> -  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
>>> -
>>> -  uint64_t Offset =
>>> -    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8,
>>> Align);
>>> -  llvm::Value *NextAddr =
>>> -    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
>>> -                      "ap.next");
>>> -  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
>>> +  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
>>>
>>> -  return AddrTyped;
>>> +  // x86-32 changes the alignment of certain arguments on the stack.
>>> +  //
>>> +  // Just messing with TypeInfo like this works because we never pass
>>> +  // anything indirectly.
>>> +  TypeInfo.second = CharUnits::fromQuantity(
>>> +                getTypeStackAlignInBytes(Ty,
>>> TypeInfo.second.getQuantity()));
>>> +
>>> +  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
>>> +                          TypeInfo, CharUnits::fromQuantity(4),
>>> +                          /*AllowHigherAlign*/ true);
>>>  }
>>>
>>>  bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
>>> @@ -1449,8 +1562,9 @@ bool X86_32TargetCodeGenInfo::initDwarfE
>>>    } else {
>>>      // 9 is %eflags, which doesn't get a size on Darwin for some
>>>      // reason.
>>> -    Builder.CreateStore(
>>> -        Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address,
>>> 9));
>>> +    Builder.CreateAlignedStore(
>>> +        Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address,
>>> 9),
>>> +                               CharUnits::One());
>>>
>>>      // 11-16 are st(0..5).  Not sure why we stop at 5.
>>>      // These have size 12, which is sizeof(long double) on
>>> @@ -1619,8 +1733,8 @@ public:
>>>
>>>    void computeInfo(CGFunctionInfo &FI) const override;
>>>
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override;
>>> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
>>> +                    QualType Ty) const override;
>>>
>>>    bool has64BitPointers() const {
>>>      return Has64BitPointers;
>>> @@ -1638,8 +1752,8 @@ public:
>>>
>>>    void computeInfo(CGFunctionInfo &FI) const override;
>>>
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override;
>>> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
>>> +                    QualType Ty) const override;
>>>
>>>    bool isHomogeneousAggregateBaseType(QualType Ty) const override {
>>>      // FIXME: Assumes vectorcall is in use.
>>> @@ -2257,7 +2371,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectRet
>>>              ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
>>>    }
>>>
>>> -  return ABIArgInfo::getIndirect(0);
>>> +  return getNaturalAlignIndirect(Ty);
>>>  }
>>>
>>>  bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
>>> @@ -2291,7 +2405,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectRes
>>>    }
>>>
>>>    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
>>> -    return ABIArgInfo::getIndirect(0, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>> +    return getNaturalAlignIndirect(Ty, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>>
>>>    // Compute the byval alignment. We specify the alignment of the byval
>>> in all
>>>    // cases so that the mid-level optimizer knows the alignment of the
>>> byval.
>>> @@ -2328,7 +2442,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectRes
>>>                                                            Size));
>>>    }
>>>
>>> -  return ABIArgInfo::getIndirect(Align);
>>> +  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align));
>>>  }
>>>
>>>  /// The ABI specifies that a value should be passed in a full vector
>>> XMM/YMM
>>> @@ -2912,11 +3026,10 @@ void X86_64ABIInfo::computeInfo(CGFuncti
>>>    }
>>>  }
>>>
>>> -static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr,
>>> -                                        QualType Ty,
>>> -                                        CodeGenFunction &CGF) {
>>> -  llvm::Value *overflow_arg_area_p = CGF.Builder.CreateStructGEP(
>>> -      nullptr, VAListAddr, 2, "overflow_arg_area_p");
>>> +static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
>>> +                                         Address VAListAddr, QualType
>>> Ty) {
>>> +  Address overflow_arg_area_p = CGF.Builder.CreateStructGEP(
>>> +      VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p");
>>>    llvm::Value *overflow_arg_area =
>>>      CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
>>>
>>> @@ -2924,7 +3037,7 @@ static llvm::Value *EmitVAArgFromMemory(
>>>    // byte boundary if alignment needed by type exceeds 8 byte boundary.
>>>    // It isn't stated explicitly in the standard, but in practice we use
>>>    // alignment greater than 16 where necessary.
>>> -  uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
>>> +  uint64_t Align =
>>> CGF.getContext().getTypeAlignInChars(Ty).getQuantity();
>>>    if (Align > 8) {
>>>      // overflow_arg_area = (overflow_arg_area + align - 1) & -align;
>>>      llvm::Value *Offset =
>>> @@ -2958,11 +3071,11 @@ static llvm::Value *EmitVAArgFromMemory(
>>>    CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);
>>>
>>>    // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
>>> -  return Res;
>>> +  return Address(Res, CharUnits::fromQuantity(Align));
>>>  }
>>>
>>> -llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType
>>> Ty,
>>> -                                      CodeGenFunction &CGF) const {
>>> +Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
>>> VAListAddr,
>>> +                                 QualType Ty) const {
>>>    // Assume that va_list type is correct; should be pointer to LLVM
>>> type:
>>>    // struct {
>>>    //   i32 gp_offset;
>>> @@ -2972,14 +3085,14 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>>>    // };
>>>    unsigned neededInt, neededSSE;
>>>
>>> -  Ty = CGF.getContext().getCanonicalType(Ty);
>>> +  Ty = getContext().getCanonicalType(Ty);
>>>    ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
>>>                                         /*isNamedArg*/false);
>>>
>>>    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
>>>    // in the registers. If not go to step 7.
>>>    if (!neededInt && !neededSSE)
>>> -    return EmitVAArgFromMemory(VAListAddr, Ty, CGF);
>>> +    return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
>>>
>>>    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
>>>    // general purpose registers needed to pass type and num_fp to hold
>>> @@ -2993,11 +3106,12 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>>>    // register save space).
>>>
>>>    llvm::Value *InRegs = nullptr;
>>> -  llvm::Value *gp_offset_p = nullptr, *gp_offset = nullptr;
>>> -  llvm::Value *fp_offset_p = nullptr, *fp_offset = nullptr;
>>> +  Address gp_offset_p = Address::invalid(), fp_offset_p =
>>> Address::invalid();
>>> +  llvm::Value *gp_offset = nullptr, *fp_offset = nullptr;
>>>    if (neededInt) {
>>>      gp_offset_p =
>>> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 0,
>>> "gp_offset_p");
>>> +        CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(),
>>> +                                    "gp_offset_p");
>>>      gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
>>>      InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
>>>      InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
>>> @@ -3005,7 +3119,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>>>
>>>    if (neededSSE) {
>>>      fp_offset_p =
>>> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 1,
>>> "fp_offset_p");
>>> +        CGF.Builder.CreateStructGEP(VAListAddr, 1,
>>> CharUnits::fromQuantity(4),
>>> +                                    "fp_offset_p");
>>>      fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
>>>      llvm::Value *FitsInFP =
>>>        llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
>>> @@ -3033,14 +3148,17 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>>>    // simple assembling of a structure from scattered addresses has many
>>> more
>>>    // loads than necessary. Can we clean this up?
>>>    llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
>>> -  llvm::Value *RegAddr = CGF.Builder.CreateLoad(
>>> -      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3),
>>> "reg_save_area");
>>> +  llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
>>> +      CGF.Builder.CreateStructGEP(VAListAddr, 3,
>>> CharUnits::fromQuantity(16)),
>>> +                                  "reg_save_area");
>>> +
>>> +  Address RegAddr = Address::invalid();
>>>    if (neededInt && neededSSE) {
>>>      // FIXME: Cleanup.
>>>      assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
>>>      llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
>>> -    llvm::Value *Tmp = CGF.CreateMemTemp(Ty);
>>> -    Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
>>> +    Address Tmp = CGF.CreateMemTemp(Ty);
>>> +    Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
>>>      assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed
>>> regs");
>>>      llvm::Type *TyLo = ST->getElementType(0);
>>>      llvm::Type *TyHi = ST->getElementType(1);
>>> @@ -3048,57 +3166,77 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>>>             "Unexpected ABI info for mixed regs");
>>>      llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
>>>      llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
>>> -    llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegAddr, gp_offset);
>>> -    llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegAddr, fp_offset);
>>> +    llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegSaveArea, gp_offset);
>>> +    llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegSaveArea, fp_offset);
>>>      llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
>>>      llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
>>> +
>>> +    // Copy the first element.
>>>      llvm::Value *V =
>>> -      CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegLoAddr,
>>> PTyLo));
>>> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
>>> -    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegHiAddr,
>>> PTyHi));
>>> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
>>> +      CGF.Builder.CreateDefaultAlignedLoad(
>>> +                               CGF.Builder.CreateBitCast(RegLoAddr,
>>> PTyLo));
>>> +    CGF.Builder.CreateStore(V,
>>> +                    CGF.Builder.CreateStructGEP(Tmp, 0,
>>> CharUnits::Zero()));
>>> +
>>> +    // Copy the second element.
>>> +    V = CGF.Builder.CreateDefaultAlignedLoad(
>>> +                               CGF.Builder.CreateBitCast(RegHiAddr,
>>> PTyHi));
>>> +    CharUnits Offset = CharUnits::fromQuantity(
>>> +
>>>  getDataLayout().getStructLayout(ST)->getElementOffset(1));
>>> +    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1,
>>> Offset));
>>>
>>> -    RegAddr = CGF.Builder.CreateBitCast(Tmp,
>>> -
>>> llvm::PointerType::getUnqual(LTy));
>>> +    RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
>>>    } else if (neededInt) {
>>> -    RegAddr = CGF.Builder.CreateGEP(RegAddr, gp_offset);
>>> -    RegAddr = CGF.Builder.CreateBitCast(RegAddr,
>>> -
>>> llvm::PointerType::getUnqual(LTy));
>>> +    RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, gp_offset),
>>> +                      CharUnits::fromQuantity(8));
>>> +    RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
>>>
>>>      // Copy to a temporary if necessary to ensure the appropriate
>>> alignment.
>>>      std::pair<CharUnits, CharUnits> SizeAlign =
>>> -        CGF.getContext().getTypeInfoInChars(Ty);
>>> +        getContext().getTypeInfoInChars(Ty);
>>>      uint64_t TySize = SizeAlign.first.getQuantity();
>>> -    unsigned TyAlign = SizeAlign.second.getQuantity();
>>> -    if (TyAlign > 8) {
>>> -      llvm::Value *Tmp = CGF.CreateMemTemp(Ty);
>>> -      CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, 8, false);
>>> +    CharUnits TyAlign = SizeAlign.second;
>>> +
>>> +    // Copy into a temporary if the type is more aligned than the
>>> +    // register save area.
>>> +    if (TyAlign.getQuantity() > 8) {
>>> +      Address Tmp = CGF.CreateMemTemp(Ty);
>>> +      CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
>>>        RegAddr = Tmp;
>>>      }
>>> +
>>>    } else if (neededSSE == 1) {
>>> -    RegAddr = CGF.Builder.CreateGEP(RegAddr, fp_offset);
>>> -    RegAddr = CGF.Builder.CreateBitCast(RegAddr,
>>> -
>>> llvm::PointerType::getUnqual(LTy));
>>> +    RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset),
>>> +                      CharUnits::fromQuantity(16));
>>> +    RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
>>>    } else {
>>>      assert(neededSSE == 2 && "Invalid number of needed registers!");
>>>      // SSE registers are spaced 16 bytes apart in the register save
>>>      // area, we need to collect the two eightbytes together.
>>> -    llvm::Value *RegAddrLo = CGF.Builder.CreateGEP(RegAddr, fp_offset);
>>> -    llvm::Value *RegAddrHi = CGF.Builder.CreateConstGEP1_32(RegAddrLo,
>>> 16);
>>> +    // The ABI isn't explicit about this, but it seems reasonable
>>> +    // to assume that the slots are 16-byte aligned, since the stack is
>>> +    // naturally 16-byte aligned and the prologue is expected to store
>>> +    // all the SSE registers to the RSA.
>>> +    Address RegAddrLo = Address(CGF.Builder.CreateGEP(RegSaveArea,
>>> fp_offset),
>>> +                                CharUnits::fromQuantity(16));
>>> +    Address RegAddrHi =
>>> +      CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
>>> +
>>>  CharUnits::fromQuantity(16));
>>>      llvm::Type *DoubleTy = CGF.DoubleTy;
>>> -    llvm::Type *DblPtrTy =
>>> -      llvm::PointerType::getUnqual(DoubleTy);
>>>      llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy,
>>> nullptr);
>>> -    llvm::Value *V, *Tmp = CGF.CreateMemTemp(Ty);
>>> -    Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
>>> -    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrLo,
>>> -                                                         DblPtrTy));
>>> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
>>> -    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrHi,
>>> -                                                         DblPtrTy));
>>> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
>>> -    RegAddr = CGF.Builder.CreateBitCast(Tmp,
>>> -
>>> llvm::PointerType::getUnqual(LTy));
>>> +    llvm::Value *V;
>>> +    Address Tmp = CGF.CreateMemTemp(Ty);
>>> +    Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
>>> +    V = CGF.Builder.CreateLoad(
>>> +                   CGF.Builder.CreateElementBitCast(RegAddrLo,
>>> DoubleTy));
>>> +    CGF.Builder.CreateStore(V,
>>> +                   CGF.Builder.CreateStructGEP(Tmp, 0,
>>> CharUnits::Zero()));
>>> +    V = CGF.Builder.CreateLoad(
>>> +                   CGF.Builder.CreateElementBitCast(RegAddrHi,
>>> DoubleTy));
>>> +    CGF.Builder.CreateStore(V,
>>> +          CGF.Builder.CreateStructGEP(Tmp, 1,
>>> CharUnits::fromQuantity(8)));
>>> +
>>> +    RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
>>>    }
>>>
>>>    // AMD64-ABI 3.5.7p5: Step 5. Set:
>>> @@ -3119,15 +3257,13 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>>>    // Emit code to load the value if it was passed in memory.
>>>
>>>    CGF.EmitBlock(InMemBlock);
>>> -  llvm::Value *MemAddr = EmitVAArgFromMemory(VAListAddr, Ty, CGF);
>>> +  Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
>>>
>>>    // Return the appropriate result.
>>>
>>>    CGF.EmitBlock(ContBlock);
>>> -  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(RegAddr->getType(), 2,
>>> -                                                 "vaarg.addr");
>>> -  ResAddr->addIncoming(RegAddr, InRegBlock);
>>> -  ResAddr->addIncoming(MemAddr, InMemBlock);
>>> +  Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr,
>>> InMemBlock,
>>> +                                 "vaarg.addr");
>>>    return ResAddr;
>>>  }
>>>
>>> @@ -3148,11 +3284,11 @@ ABIArgInfo WinX86_64ABIInfo::classify(Qu
>>>    if (RT) {
>>>      if (!IsReturnType) {
>>>        if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
>>> -        return ABIArgInfo::getIndirect(0, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>> +        return getNaturalAlignIndirect(Ty, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>>      }
>>>
>>>      if (RT->getDecl()->hasFlexibleArrayMember())
>>> -      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>> +      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>>>
>>>      // FIXME: mingw-w64-gcc emits 128-bit struct as i128
>>>      if (Width == 128 &&
>>> getTarget().getTriple().isWindowsGNUEnvironment())
>>> @@ -3171,7 +3307,8 @@ ABIArgInfo WinX86_64ABIInfo::classify(Qu
>>>          return ABIArgInfo::getDirect();
>>>        return ABIArgInfo::getExpand();
>>>      }
>>> -    return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
>>> +    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align),
>>> +                                   /*ByVal=*/false);
>>>    }
>>>
>>>
>>> @@ -3187,7 +3324,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(Qu
>>>      // MS x64 ABI requirement: "Any argument that doesn't fit in 8
>>> bytes, or is
>>>      // not 1, 2, 4, or 8 bytes, must be passed by reference."
>>>      if (Width > 64 || !llvm::isPowerOf2_64(Width))
>>> -      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>> +      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>>>
>>>      // Otherwise, coerce it to a small integer.
>>>      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
>>> Width));
>>> @@ -3217,26 +3354,12 @@ void WinX86_64ABIInfo::computeInfo(CGFun
>>>      I.info = classify(I.type, FreeSSERegs, false);
>>>  }
>>>
>>> -llvm::Value *WinX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
>>> QualType Ty,
>>> -                                      CodeGenFunction &CGF) const {
>>> -  llvm::Type *BPP = CGF.Int8PtrPtrTy;
>>> -
>>> -  CGBuilderTy &Builder = CGF.Builder;
>>> -  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
>>> -                                                       "ap");
>>> -  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
>>> -  llvm::Type *PTy =
>>> -    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
>>> -  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
>>> -
>>> -  uint64_t Offset =
>>> -    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 8);
>>> -  llvm::Value *NextAddr =
>>> -    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
>>> -                      "ap.next");
>>> -  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
>>> -
>>> -  return AddrTyped;
>>> +Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
>>> VAListAddr,
>>> +                                    QualType Ty) const {
>>> +  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
>>> +                          CGF.getContext().getTypeInfoInChars(Ty),
>>> +                          CharUnits::fromQuantity(8),
>>> +                          /*allowHigherAlign*/ false);
>>>  }
>>>
>>>  // PowerPC-32
>>> @@ -3246,8 +3369,8 @@ class PPC32_SVR4_ABIInfo : public Defaul
>>>  public:
>>>    PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT)
>>> {}
>>>
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override;
>>> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
>>> +                    QualType Ty) const override;
>>>  };
>>>
>>>  class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
>>> @@ -3266,64 +3389,50 @@ public:
>>>
>>>  }
>>>
>>> -llvm::Value *PPC32_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
>>> -                                           QualType Ty,
>>> -                                           CodeGenFunction &CGF) const {
>>> +Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
>>> VAList,
>>> +                                      QualType Ty) const {
>>>    if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
>>>      // TODO: Implement this. For now ignore.
>>>      (void)CTy;
>>> -    return nullptr;
>>> +    return Address::invalid();
>>>    }
>>>
>>> +  // struct __va_list_tag {
>>> +  //   unsigned char gpr;
>>> +  //   unsigned char fpr;
>>> +  //   unsigned short reserved;
>>> +  //   void *overflow_arg_area;
>>> +  //   void *reg_save_area;
>>> +  // };
>>> +
>>>    bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) ==
>>> 64;
>>>    bool isInt =
>>>        Ty->isIntegerType() || Ty->isPointerType() ||
>>> Ty->isAggregateType();
>>> -  llvm::Type *CharPtr = CGF.Int8PtrTy;
>>> -  llvm::Type *CharPtrPtr = CGF.Int8PtrPtrTy;
>>> +
>>> +  // All aggregates are passed indirectly?  That doesn't seem consistent
>>> +  // with the argument-lowering code.
>>> +  bool isIndirect = Ty->isAggregateType();
>>>
>>>    CGBuilderTy &Builder = CGF.Builder;
>>> -  llvm::Value *GPRPtr = Builder.CreateBitCast(VAListAddr, CharPtr,
>>> "gprptr");
>>> -  llvm::Value *GPRPtrAsInt = Builder.CreatePtrToInt(GPRPtr,
>>> CGF.Int32Ty);
>>> -  llvm::Value *FPRPtrAsInt =
>>> -      Builder.CreateAdd(GPRPtrAsInt, Builder.getInt32(1));
>>> -  llvm::Value *FPRPtr = Builder.CreateIntToPtr(FPRPtrAsInt, CharPtr);
>>> -  llvm::Value *OverflowAreaPtrAsInt =
>>> -      Builder.CreateAdd(FPRPtrAsInt, Builder.getInt32(3));
>>> -  llvm::Value *OverflowAreaPtr =
>>> -      Builder.CreateIntToPtr(OverflowAreaPtrAsInt, CharPtrPtr);
>>> -  llvm::Value *RegsaveAreaPtrAsInt =
>>> -      Builder.CreateAdd(OverflowAreaPtrAsInt, Builder.getInt32(4));
>>> -  llvm::Value *RegsaveAreaPtr =
>>> -      Builder.CreateIntToPtr(RegsaveAreaPtrAsInt, CharPtrPtr);
>>> -  llvm::Value *GPR = Builder.CreateLoad(GPRPtr, false, "gpr");
>>> -  // Align GPR when TY is i64.
>>> -  if (isI64) {
>>> -    llvm::Value *GPRAnd = Builder.CreateAnd(GPR, Builder.getInt8(1));
>>> -    llvm::Value *CC64 = Builder.CreateICmpEQ(GPRAnd,
>>> Builder.getInt8(1));
>>> -    llvm::Value *GPRPlusOne = Builder.CreateAdd(GPR,
>>> Builder.getInt8(1));
>>> -    GPR = Builder.CreateSelect(CC64, GPRPlusOne, GPR);
>>> -  }
>>> -  llvm::Value *FPR = Builder.CreateLoad(FPRPtr, false, "fpr");
>>> -  llvm::Value *OverflowArea =
>>> -      Builder.CreateLoad(OverflowAreaPtr, false, "overflow_area");
>>> -  llvm::Value *OverflowAreaAsInt =
>>> -      Builder.CreatePtrToInt(OverflowArea, CGF.Int32Ty);
>>> -  llvm::Value *RegsaveArea =
>>> -      Builder.CreateLoad(RegsaveAreaPtr, false, "regsave_area");
>>> -  llvm::Value *RegsaveAreaAsInt =
>>> -      Builder.CreatePtrToInt(RegsaveArea, CGF.Int32Ty);
>>>
>>> -  llvm::Value *CC =
>>> -      Builder.CreateICmpULT(isInt ? GPR : FPR, Builder.getInt8(8),
>>> "cond");
>>> +  // The calling convention either uses 1-2 GPRs or 1 FPR.
>>> +  Address NumRegsAddr = Address::invalid();
>>> +  if (isInt) {
>>> +    NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(),
>>> "gpr");
>>> +  } else {
>>> +    NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(),
>>> "fpr");
>>> +  }
>>>
>>> -  llvm::Value *RegConstant =
>>> -      Builder.CreateMul(isInt ? GPR : FPR, Builder.getInt8(isInt ? 4 :
>>> 8));
>>> +  llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs");
>>>
>>> -  llvm::Value *OurReg = Builder.CreateAdd(
>>> -      RegsaveAreaAsInt, Builder.CreateSExt(RegConstant, CGF.Int32Ty));
>>> +  // "Align" the register count when TY is i64.
>>> +  if (isI64) {
>>> +    NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1));
>>> +    NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t)
>>> ~1U));
>>> +  }
>>>
>>> -  if (Ty->isFloatingType())
>>> -    OurReg = Builder.CreateAdd(OurReg, Builder.getInt32(32));
>>> +  llvm::Value *CC =
>>> +      Builder.CreateICmpULT(NumRegs, Builder.getInt8(8), "cond");
>>>
>>>    llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs");
>>>    llvm::BasicBlock *UsingOverflow =
>>> CGF.createBasicBlock("using_overflow");
>>> @@ -3331,39 +3440,84 @@ llvm::Value *PPC32_SVR4_ABIInfo::EmitVAA
>>>
>>>    Builder.CreateCondBr(CC, UsingRegs, UsingOverflow);
>>>
>>> -  CGF.EmitBlock(UsingRegs);
>>> +  llvm::Type *DirectTy = CGF.ConvertType(Ty);
>>> +  if (isIndirect) DirectTy = DirectTy->getPointerTo(0);
>>>
>>> -  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
>>> -  llvm::Value *Result1 = Builder.CreateIntToPtr(OurReg, PTy);
>>> -  // Increase the GPR/FPR indexes.
>>> -  if (isInt) {
>>> -    GPR = Builder.CreateAdd(GPR, Builder.getInt8(isI64 ? 2 : 1));
>>> -    Builder.CreateStore(GPR, GPRPtr);
>>> -  } else {
>>> -    FPR = Builder.CreateAdd(FPR, Builder.getInt8(1));
>>> -    Builder.CreateStore(FPR, FPRPtr);
>>> -  }
>>> -  CGF.EmitBranch(Cont);
>>> -
>>> -  CGF.EmitBlock(UsingOverflow);
>>> +  // Case 1: consume registers.
>>> +  Address RegAddr = Address::invalid();
>>> +  {
>>> +    CGF.EmitBlock(UsingRegs);
>>> +
>>> +    Address RegSaveAreaPtr =
>>> +      Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8));
>>> +    RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr),
>>> +                      CharUnits::fromQuantity(8));
>>> +    assert(RegAddr.getElementType() == CGF.Int8Ty);
>>> +
>>> +    // Floating-point registers start after the general-purpose
>>> registers.
>>> +    if (!isInt) {
>>> +      RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr,
>>> +
>>>  CharUnits::fromQuantity(32));
>>> +    }
>>> +
>>> +    // Get the address of the saved value by scaling the number of
>>> +    // registers we've used by the number of
>>> +    CharUnits RegSize = CharUnits::fromQuantity(isInt ? 4 : 8);
>>> +    llvm::Value *RegOffset =
>>> +      Builder.CreateMul(NumRegs,
>>> Builder.getInt8(RegSize.getQuantity()));
>>> +    RegAddr = Address(Builder.CreateInBoundsGEP(CGF.Int8Ty,
>>> +                                            RegAddr.getPointer(),
>>> RegOffset),
>>> +
>>> RegAddr.getAlignment().alignmentOfArrayElement(RegSize));
>>> +    RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy);
>>> +
>>> +    // Increase the used-register count.
>>> +    NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(isI64 ? 2 :
>>> 1));
>>> +    Builder.CreateStore(NumRegs, NumRegsAddr);
>>> +
>>> +    CGF.EmitBranch(Cont);
>>> +  }
>>> +
>>> +  // Case 2: consume space in the overflow area.
>>> +  Address MemAddr = Address::invalid();
>>> +  {
>>> +    CGF.EmitBlock(UsingOverflow);
>>> +
>>> +    // Everything in the overflow area is rounded up to a size of at
>>> least 4.
>>> +    CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4);
>>> +
>>> +    CharUnits Size;
>>> +    if (!isIndirect) {
>>> +      auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
>>> +      Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign);
>>> +    } else {
>>> +      Size = CGF.getPointerSize();
>>> +    }
>>>
>>> -  // Increase the overflow area.
>>> -  llvm::Value *Result2 = Builder.CreateIntToPtr(OverflowAreaAsInt, PTy);
>>> -  OverflowAreaAsInt =
>>> -      Builder.CreateAdd(OverflowAreaAsInt, Builder.getInt32(isInt ? 4 :
>>> 8));
>>> -  Builder.CreateStore(Builder.CreateIntToPtr(OverflowAreaAsInt,
>>> CharPtr),
>>> -                      OverflowAreaPtr);
>>> -  CGF.EmitBranch(Cont);
>>> +    Address OverflowAreaAddr =
>>> +      Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4));
>>> +    Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr),
>>> +                         OverflowAreaAlign);
>>> +
>>> +    // The current address is the address of the varargs element.
>>> +    // FIXME: do we not need to round up to alignment?
>>> +    MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy);
>>> +
>>> +    // Increase the overflow area.
>>> +    OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea,
>>> Size);
>>> +    Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr);
>>> +    CGF.EmitBranch(Cont);
>>> +  }
>>>
>>>    CGF.EmitBlock(Cont);
>>>
>>> -  llvm::PHINode *Result = CGF.Builder.CreatePHI(PTy, 2, "vaarg.addr");
>>> -  Result->addIncoming(Result1, UsingRegs);
>>> -  Result->addIncoming(Result2, UsingOverflow);
>>> -
>>> -  if (Ty->isAggregateType()) {
>>> -    llvm::Value *AGGPtr = Builder.CreateBitCast(Result, CharPtrPtr,
>>> "aggrptr");
>>> -    return Builder.CreateLoad(AGGPtr, false, "aggr");
>>> +  // Merge the cases with a phi.
>>> +  Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr,
>>> UsingOverflow,
>>> +                                "vaarg.addr");
>>> +
>>> +  // Load the pointer if the argument was passed indirectly.
>>> +  if (isIndirect) {
>>> +    Result = Address(Builder.CreateLoad(Result, "aggr"),
>>> +                     getContext().getTypeAlignInChars(Ty));
>>>    }
>>>
>>>    return Result;
>>> @@ -3459,7 +3613,7 @@ public:
>>>      : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
>>>
>>>    bool isPromotableTypeForABI(QualType Ty) const;
>>> -  bool isAlignedParamType(QualType Ty, bool &Align32) const;
>>> +  CharUnits getParamTypeAlignment(QualType Ty) const;
>>>
>>>    ABIArgInfo classifyReturnType(QualType RetTy) const;
>>>    ABIArgInfo classifyArgumentType(QualType Ty) const;
>>> @@ -3496,8 +3650,8 @@ public:
>>>      }
>>>    }
>>>
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override;
>>> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
>>> +                    QualType Ty) const override;
>>>  };
>>>
>>>  class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
>>> @@ -3557,12 +3711,9 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForA
>>>    return false;
>>>  }
>>>
>>> -/// isAlignedParamType - Determine whether a type requires 16-byte
>>> -/// alignment in the parameter area.
>>> -bool
>>> -PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty, bool &Align32)
>>> const {
>>> -  Align32 = false;
>>> -
>>> +/// isAlignedParamType - Determine whether a type requires 16-byte or
>>> +/// higher alignment in the parameter area.  Always returns at least 8.
>>> +CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
>>>    // Complex types are passed just like their elements.
>>>    if (const ComplexType *CTy = Ty->getAs<ComplexType>())
>>>      Ty = CTy->getElementType();
>>> @@ -3571,11 +3722,11 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(Q
>>>    // passed via reference, smaller types are not aligned).
>>>    if (IsQPXVectorTy(Ty)) {
>>>      if (getContext().getTypeSize(Ty) > 128)
>>> -      Align32 = true;
>>> +      return CharUnits::fromQuantity(32);
>>>
>>> -    return true;
>>> +    return CharUnits::fromQuantity(16);
>>>    } else if (Ty->isVectorType()) {
>>> -    return getContext().getTypeSize(Ty) == 128;
>>> +    return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128
>>> ? 16 : 8);
>>>    }
>>>
>>>    // For single-element float/vector structs, we consider the whole type
>>> @@ -3600,22 +3751,22 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(Q
>>>    // With special case aggregates, only vector base types need
>>> alignment.
>>>    if (AlignAsType && IsQPXVectorTy(AlignAsType)) {
>>>      if (getContext().getTypeSize(AlignAsType) > 128)
>>> -      Align32 = true;
>>> +      return CharUnits::fromQuantity(32);
>>>
>>> -    return true;
>>> +    return CharUnits::fromQuantity(16);
>>>    } else if (AlignAsType) {
>>> -    return AlignAsType->isVectorType();
>>> +    return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 :
>>> 8);
>>>    }
>>>
>>>    // Otherwise, we only need alignment for any aggregate type that
>>>    // has an alignment requirement of >= 16 bytes.
>>>    if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >=
>>> 128) {
>>>      if (HasQPX && getContext().getTypeAlign(Ty) >= 256)
>>> -      Align32 = true;
>>> -    return true;
>>> +      return CharUnits::fromQuantity(32);
>>> +    return CharUnits::fromQuantity(16);
>>>    }
>>>
>>> -  return false;
>>> +  return CharUnits::fromQuantity(8);
>>>  }
>>>
>>>  /// isHomogeneousAggregate - Return true if a type is an ELFv2
>>> homogeneous
>>> @@ -3748,7 +3899,7 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
>>>    if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) {
>>>      uint64_t Size = getContext().getTypeSize(Ty);
>>>      if (Size > 128)
>>> -      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>> +      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>>>      else if (Size < 128) {
>>>        llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(),
>>> Size);
>>>        return ABIArgInfo::getDirect(CoerceTy);
>>> @@ -3757,12 +3908,10 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
>>>
>>>    if (isAggregateTypeForABI(Ty)) {
>>>      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
>>> -      return ABIArgInfo::getIndirect(0, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>> +      return getNaturalAlignIndirect(Ty, RAA ==
>>> CGCXXABI::RAA_DirectInMemory);
>>>
>>> -    bool Align32;
>>> -    uint64_t ABIAlign = isAlignedParamType(Ty, Align32) ?
>>> -                          (Align32 ? 32 : 16) : 8;
>>> -    uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
>>> +    uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity();
>>> +    uint64_t TyAlign =
>>> getContext().getTypeAlignInChars(Ty).getQuantity();
>>>
>>>      // ELFv2 homogeneous aggregates are passed as array types.
>>>      const Type *Base = nullptr;
>>> @@ -3800,7 +3949,8 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
>>>      }
>>>
>>>      // All other aggregates are passed ByVal.
>>> -    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
>>> +    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
>>> +                                   /*ByVal=*/true,
>>>                                     /*Realign=*/TyAlign > ABIAlign);
>>>    }
>>>
>>> @@ -3821,7 +3971,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(Q
>>>    if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) {
>>>      uint64_t Size = getContext().getTypeSize(RetTy);
>>>      if (Size > 128)
>>> -      return ABIArgInfo::getIndirect(0);
>>> +      return getNaturalAlignIndirect(RetTy);
>>>      else if (Size < 128) {
>>>        llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(),
>>> Size);
>>>        return ABIArgInfo::getDirect(CoerceTy);
>>> @@ -3856,7 +4006,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(Q
>>>      }
>>>
>>>      // All other aggregates are returned indirectly.
>>> -    return ABIArgInfo::getIndirect(0);
>>> +    return getNaturalAlignIndirect(RetTy);
>>>    }
>>>
>>>    return (isPromotableTypeForABI(RetTy) ?
>>> @@ -3864,47 +4014,12 @@ PPC64_SVR4_ABIInfo::classifyReturnType(Q
>>>  }
>>>
>>>  // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
>>> -llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
>>> -                                           QualType Ty,
>>> -                                           CodeGenFunction &CGF) const {
>>> -  llvm::Type *BP = CGF.Int8PtrTy;
>>> -  llvm::Type *BPP = CGF.Int8PtrPtrTy;
>>> -
>>> -  CGBuilderTy &Builder = CGF.Builder;
>>> -  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
>>> "ap");
>>> -  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
>>> -
>>> -  // Handle types that require 16-byte alignment in the parameter save
>>> area.
>>> -  bool Align32;
>>> -  if (isAlignedParamType(Ty, Align32)) {
>>> -    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
>>> -    AddrAsInt = Builder.CreateAdd(AddrAsInt,
>>> -                                  Builder.getInt64(Align32 ? 31 : 15));
>>> -    AddrAsInt = Builder.CreateAnd(AddrAsInt,
>>> -                                  Builder.getInt64(Align32 ? -32 :
>>> -16));
>>> -    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
>>> -  }
>>> -
>>> -  // Update the va_list pointer.  The pointer should be bumped by the
>>> -  // size of the object.  We can trust getTypeSize() except for a
>>> complex
>>> -  // type whose base type is smaller than a doubleword.  For these, the
>>> -  // size of the object is 16 bytes; see below for further explanation.
>>> -  unsigned SizeInBytes = CGF.getContext().getTypeSize(Ty) / 8;
>>> -  QualType BaseTy;
>>> -  unsigned CplxBaseSize = 0;
>>> -
>>> -  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
>>> -    BaseTy = CTy->getElementType();
>>> -    CplxBaseSize = CGF.getContext().getTypeSize(BaseTy) / 8;
>>> -    if (CplxBaseSize < 8)
>>> -      SizeInBytes = 16;
>>> -  }
>>> +Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
>>> VAListAddr,
>>> +                                      QualType Ty) const {
>>> +  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
>>> +  TypeInfo.second = getParamTypeAlignment(Ty);
>>>
>>> -  unsigned Offset = llvm::RoundUpToAlignment(SizeInBytes, 8);
>>> -  llvm::Value *NextAddr =
>>> -    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int64Ty, Offset),
>>> -                      "ap.next");
>>> -  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
>>> +  CharUnits SlotSize = CharUnits::fromQuantity(8);
>>>
>>>    // If we have a complex type and the base type is smaller than 8
>>> bytes,
>>>    // the ABI calls for the real and imaginary parts to be right-adjusted
>>> @@ -3912,44 +4027,40 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAA
>>>    // pointer to a structure with the two parts packed tightly.  So
>>> generate
>>>    // loads of the real and imaginary parts relative to the va_list
>>> pointer,
>>>    // and store them to a temporary structure.
>>> -  if (CplxBaseSize && CplxBaseSize < 8) {
>>> -    llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
>>> -    llvm::Value *ImagAddr = RealAddr;
>>> -    if (CGF.CGM.getDataLayout().isBigEndian()) {
>>> -      RealAddr =
>>> -          Builder.CreateAdd(RealAddr, Builder.getInt64(8 -
>>> CplxBaseSize));
>>> -      ImagAddr =
>>> -          Builder.CreateAdd(ImagAddr, Builder.getInt64(16 -
>>> CplxBaseSize));
>>> -    } else {
>>> -      ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(8));
>>> +  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
>>> +    CharUnits EltSize = TypeInfo.first / 2;
>>> +    if (EltSize < SlotSize) {
>>> +      Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty,
>>> +                                            SlotSize * 2, SlotSize,
>>> +                                            SlotSize, /*AllowHigher*/
>>> true);
>>> +
>>> +      Address RealAddr = Addr;
>>> +      Address ImagAddr = RealAddr;
>>> +      if (CGF.CGM.getDataLayout().isBigEndian()) {
>>> +        RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr,
>>> +                                                          SlotSize -
>>> EltSize);
>>> +        ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
>>> +                                                      2 * SlotSize -
>>> EltSize);
>>> +      } else {
>>> +        ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr,
>>> SlotSize);
>>> +      }
>>> +
>>> +      llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
>>> +      RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
>>> +      ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
>>> +      llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
>>> +      llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
>>> +
>>> +      Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
>>> +      CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
>>> +                             /*init*/ true);
>>> +      return Temp;
>>>      }
>>> -    llvm::Type *PBaseTy =
>>> llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy));
>>> -    RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy);
>>> -    ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
>>> -    llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal");
>>> -    llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag");
>>> -    llvm::AllocaInst *Ptr =
>>> -        CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty), "vacplx");
>>> -    llvm::Value *RealPtr =
>>> -        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 0,
>>> ".real");
>>> -    llvm::Value *ImagPtr =
>>> -        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 1,
>>> ".imag");
>>> -    Builder.CreateStore(Real, RealPtr, false);
>>> -    Builder.CreateStore(Imag, ImagPtr, false);
>>> -    return Ptr;
>>> -  }
>>> -
>>> -  // If the argument is smaller than 8 bytes, it is right-adjusted in
>>> -  // its doubleword slot.  Adjust the pointer to pick it up from the
>>> -  // correct offset.
>>> -  if (SizeInBytes < 8 && CGF.CGM.getDataLayout().isBigEndian()) {
>>> -    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
>>> -    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(8 -
>>> SizeInBytes));
>>> -    Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
>>>    }
>>>
>>> -  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
>>> -  return Builder.CreateBitCast(Addr, PTy);
>>> +  // Otherwise, just use the general rule.
>>> +  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
>>> +                          TypeInfo, SlotSize, /*AllowHigher*/ true);
>>>  }
>>>
>>>  static bool
>>> @@ -4047,14 +4158,14 @@ private:
>>>        it.info = classifyArgumentType(it.type);
>>>    }
>>>
>>> -  llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                               CodeGenFunction &CGF) const;
>>> +  Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
>>> +                          CodeGenFunction &CGF) const;
>>>
>>> -  llvm::Value *EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                              CodeGenFunction &CGF) const;
>>> +  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
>>> +                         CodeGenFunction &CGF) const;
>>>
>>> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
>>> -                         CodeGenFunction &CGF) const override {
>>> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
>>> +                    QualType Ty) const override {
>>>      return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
>>>                           : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
>>>    }
>>> @@ -4097,7 +4208,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
>>>            llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()),
>>> 4);
>>>        return ABIArgInfo::getDirect(ResType);
>>>      }
>>> -    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>> +    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>>>    }
>>>
>>>    if (!isAggregateTypeForABI(Ty)) {
>>> @@ -4113,8 +4224,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
>>>    // Structures with either a non-trivial destructor or a non-trivial
>>>    // copy constructor are always indirect.
>>>    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
>>> -    return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
>>> -                                   CGCXXABI::RAA_DirectInMemory);
>>> +    return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
>>> +                                     CGCXXABI::RAA_DirectInMemory);
>>>    }
>>>
>>>    // Empty records are always ignored on Darwin, but actually passed in
>>> C++ mode
>>> @@ -4149,7 +4260,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
>>>      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
>>> Size));
>>>    }
>>>
>>> -  return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>> +  return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>>>  }
>>>
>>>  ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
>>> @@ -4158,7 +4269,7 @@ ABIArgInfo AArch64ABIInfo::classifyRetur
>>>
>>>    // Large vector types should be returned via memory.
>>>    if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
>>> -    return ABIArgInfo::getIndirect(0);
>>> +    return getNaturalAlignIndirect(RetTy);
>>>
>>>    if (!isAggregateTypeForABI(RetTy)) {
>>>      // Treat an enum type as its underlying type.
>>> @@ -4194,7 +4305,7 @@ ABIArgInfo AArch64ABIInfo::classifyRetur
>>>      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
>>> Size));
>>>    }
>>>
>>> -  return ABIArgInfo::getIndirect(0);
>>> +  return getNaturalAlignIndirect(RetTy);
>>>  }
>>>
>>>  /// isIllegalVectorType - check whether the vector type is legal for
>>> AArch64.
>>> @@ -4232,7 +4343,7 @@ bool AArch64ABIInfo::isHomogeneousAggreg
>>>    return Members <= 4;
>>>  }
>>>
>>> -llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
>>> +Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
>>>                                              QualType Ty,
>>>                                              CodeGenFunction &CGF) const
>>> {
>>>    ABIArgInfo AI = classifyArgumentType(Ty);
>>> @@ -4266,24 +4377,32 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>>>    llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
>>>    llvm::BasicBlock *OnStackBlock =
>>> CGF.createBasicBlock("vaarg.on_stack");
>>>    llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
>>> -  auto &Ctx = CGF.getContext();
>>>
>>> -  llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
>>> +  auto TyInfo = getContext().getTypeInfoInChars(Ty);
>>> +  CharUnits TyAlign = TyInfo.second;
>>> +
>>> +  Address reg_offs_p = Address::invalid();
>>> +  llvm::Value *reg_offs = nullptr;
>>>    int reg_top_index;
>>> -  int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
>>> +  CharUnits reg_top_offset;
>>> +  int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity();
>>>    if (!IsFPR) {
>>>      // 3 is the field number of __gr_offs
>>>      reg_offs_p =
>>> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3,
>>> "gr_offs_p");
>>> +        CGF.Builder.CreateStructGEP(VAListAddr, 3,
>>> CharUnits::fromQuantity(24),
>>> +                                    "gr_offs_p");
>>>      reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
>>>      reg_top_index = 1; // field number for __gr_top
>>> +    reg_top_offset = CharUnits::fromQuantity(8);
>>>      RegSize = llvm::RoundUpToAlignment(RegSize, 8);
>>>    } else {
>>>      // 4 is the field number of __vr_offs.
>>>      reg_offs_p =
>>> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 4,
>>> "vr_offs_p");
>>> +        CGF.Builder.CreateStructGEP(VAListAddr, 4,
>>> CharUnits::fromQuantity(28),
>>> +                                    "vr_offs_p");
>>>      reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
>>>      reg_top_index = 2; // field number for __vr_top
>>> +    reg_top_offset = CharUnits::fromQuantity(16);
>>>      RegSize = 16 * NumRegs;
>>>    }
>>>
>>> @@ -4308,8 +4427,8 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>>>    // Integer arguments may need to correct register alignment (for
>>> example a
>>>    // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this
>>> case we
>>>    // align __gr_offs to calculate the potential address.
>>> -  if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
>>> -    int Align = Ctx.getTypeAlign(Ty) / 8;
>>> +  if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
>>> +    int Align = TyAlign.getQuantity();
>>>
>>>      reg_offs = CGF.Builder.CreateAdd(
>>>          reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
>>> @@ -4320,6 +4439,9 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>>>    }
>>>
>>>    // Update the gr_offs/vr_offs pointer for next call to va_arg on this
>>> va_list.
>>> +  // The fact that this is done unconditionally reflects the fact that
>>> +  // allocating an argument to the stack also uses up all the remaining
>>> +  // registers of the appropriate kind.
>>>    llvm::Value *NewOffset = nullptr;
>>>    NewOffset = CGF.Builder.CreateAdd(
>>>        reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize),
>>> "new_reg_offs");
>>> @@ -4341,13 +4463,14 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>>>    // registers. First start the appropriate block:
>>>    CGF.EmitBlock(InRegBlock);
>>>
>>> -  llvm::Value *reg_top_p = nullptr, *reg_top = nullptr;
>>> -  reg_top_p = CGF.Builder.CreateStructGEP(nullptr, VAListAddr,
>>> reg_top_index,
>>> -                                          "reg_top_p");
>>> +  llvm::Value *reg_top = nullptr;
>>> +  Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr,
>>> reg_top_index,
>>> +                                                  reg_top_offset,
>>> "reg_top_p");
>>>    reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
>>> -  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
>>> -  llvm::Value *RegAddr = nullptr;
>>> -  llvm::Type *MemTy =
>>> llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
>>> +  Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs),
>>> +                   CharUnits::fromQuantity(IsFPR ? 16 : 8));
>>> +  Address RegAddr = Address::invalid();
>>> +  llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty);
>>>
>>>    if (IsIndirect) {
>>>      // If it's been passed indirectly (actually a struct), whatever we
>>> find from
>>> @@ -4364,43 +4487,45 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>>>      // qN+1, ...). We reload and store into a temporary local variable
>>>      // contiguously.
>>>      assert(!IsIndirect &&
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150910/09458896/attachment-0001.html>


More information about the cfe-commits mailing list