r246985 - Compute and preserve alignment more faithfully in IR-generation.

Chandler Carruth via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 10 14:39:41 PDT 2015


In case anyone else is chasing the same thing, I wanted to post that we're
seeing crashes in code introduced by this commit. Specifically, crashes
inside of libjpeg-turbo's assembly implementation code, so its proving very
challenging to track down. It looks like either incorrect vaargs stack
setup, reaching past the redzone due to alignment padding, or something
else weird.

Notably, it is *not* a SIGILL due to an overaligned access to memory.

Anyways, if we get a test case I'll probably file it and revert, but its
proving *very* hard to track down so wanted to see if others have seen
something here.

-Chandler

On Tue, Sep 8, 2015 at 1:07 AM John McCall via cfe-commits <
cfe-commits at lists.llvm.org> wrote:

> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=246985&r1=246984&r2=246985&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Sep  8 03:05:57 2015
> @@ -39,7 +39,7 @@ static void AssignToArrayRange(CodeGen::
>    for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
>      llvm::Value *Cell =
>          Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I);
> -    Builder.CreateStore(Value, Cell);
> +    Builder.CreateAlignedStore(Value, Cell, CharUnits::One());
>    }
>  }
>
> @@ -48,6 +48,19 @@ static bool isAggregateTypeForABI(QualTy
>           T->isMemberFunctionPointerType();
>  }
>
> +ABIArgInfo
> +ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByRef, bool Realign,
> +                                 llvm::Type *Padding) const {
> +  return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty),
> +                                 ByRef, Realign, Padding);
> +}
> +
> +ABIArgInfo
> +ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const {
> +  return
> ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty),
> +                                      /*ByRef*/ false, Realign);
> +}
> +
>  ABIInfo::~ABIInfo() {}
>
>  static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
> @@ -133,7 +146,7 @@ void ABIArgInfo::dump() const {
>      OS << "InAlloca Offset=" << getInAllocaFieldIndex();
>      break;
>    case Indirect:
> -    OS << "Indirect Align=" << getIndirectAlign()
> +    OS << "Indirect Align=" << getIndirectAlign().getQuantity()
>         << " ByVal=" << getIndirectByVal()
>         << " Realign=" << getIndirectRealign();
>      break;
> @@ -144,6 +157,125 @@ void ABIArgInfo::dump() const {
>    OS << ")\n";
>  }
>
> +/// Emit va_arg for a platform using the common void* representation,
> +/// where arguments are simply emitted in an array of slots on the stack.
> +///
> +/// This version implements the core direct-value passing rules.
> +///
> +/// \param SlotSize - The size and alignment of a stack slot.
> +///   Each argument will be allocated to a multiple of this number of
> +///   slots, and all the slots will be aligned to this value.
> +/// \param AllowHigherAlign - The slot alignment is not a cap;
> +///   an argument type with an alignment greater than the slot size
> +///   will be emitted on a higher-alignment address, potentially
> +///   leaving one or more empty slots behind as padding.  If this
> +///   is false, the returned address might be less-aligned than
> +///   DirectAlign.
> +static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
> +                                      Address VAListAddr,
> +                                      llvm::Type *DirectTy,
> +                                      CharUnits DirectSize,
> +                                      CharUnits DirectAlign,
> +                                      CharUnits SlotSize,
> +                                      bool AllowHigherAlign) {
> +  // Cast the element type to i8* if necessary.  Some platforms define
> +  // va_list as a struct containing an i8* instead of just an i8*.
> +  if (VAListAddr.getElementType() != CGF.Int8PtrTy)
> +    VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr,
> CGF.Int8PtrTy);
> +
> +  llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur");
> +
> +  // If the CC aligns values higher than the slot size, do so if needed.
> +  Address Addr = Address::invalid();
> +  if (AllowHigherAlign && DirectAlign > SlotSize) {
> +    llvm::Value *PtrAsInt = Ptr;
> +    PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy);
> +    PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt,
> +          llvm::ConstantInt::get(CGF.IntPtrTy, DirectAlign.getQuantity()
> - 1));
> +    PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt,
> +             llvm::ConstantInt::get(CGF.IntPtrTy,
> -DirectAlign.getQuantity()));
> +    Addr = Address(CGF.Builder.CreateIntToPtr(PtrAsInt, Ptr->getType(),
> +                                              "argp.cur.aligned"),
> +                   DirectAlign);
> +  } else {
> +    Addr = Address(Ptr, SlotSize);
> +  }
> +
> +  // Advance the pointer past the argument, then store that back.
> +  CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize);
> +  llvm::Value *NextPtr =
> +    CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(),
> FullDirectSize,
> +                                           "argp.next");
> +  CGF.Builder.CreateStore(NextPtr, VAListAddr);
> +
> +  // If the argument is smaller than a slot, and this is a big-endian
> +  // target, the argument will be right-adjusted in its slot.
> +  if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) {
> +    Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize -
> DirectSize);
> +  }
> +
> +  Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy);
> +  return Addr;
> +}
> +
> +/// Emit va_arg for a platform using the common void* representation,
> +/// where arguments are simply emitted in an array of slots on the stack.
> +///
> +/// \param IsIndirect - Values of this type are passed indirectly.
> +/// \param ValueInfo - The size and alignment of this type, generally
> +///   computed with getContext().getTypeInfoInChars(ValueTy).
> +/// \param SlotSizeAndAlign - The size and alignment of a stack slot.
> +///   Each argument will be allocated to a multiple of this number of
> +///   slots, and all the slots will be aligned to this value.
> +/// \param AllowHigherAlign - The slot alignment is not a cap;
> +///   an argument type with an alignment greater than the slot size
> +///   will be emitted on a higher-alignment address, potentially
> +///   leaving one or more empty slots behind as padding.
> +static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                                QualType ValueTy, bool IsIndirect,
> +                                std::pair<CharUnits, CharUnits> ValueInfo,
> +                                CharUnits SlotSizeAndAlign,
> +                                bool AllowHigherAlign) {
> +  // The size and alignment of the value that was passed directly.
> +  CharUnits DirectSize, DirectAlign;
> +  if (IsIndirect) {
> +    DirectSize = CGF.getPointerSize();
> +    DirectAlign = CGF.getPointerAlign();
> +  } else {
> +    DirectSize = ValueInfo.first;
> +    DirectAlign = ValueInfo.second;
> +  }
> +
> +  // Cast the address we've calculated to the right type.
> +  llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy);
> +  if (IsIndirect)
> +    DirectTy = DirectTy->getPointerTo(0);
> +
> +  Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy,
> +                                        DirectSize, DirectAlign,
> +                                        SlotSizeAndAlign,
> +                                        AllowHigherAlign);
> +
> +  if (IsIndirect) {
> +    Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second);
> +  }
> +
> +  return Addr;
> +
> +}
> +
> +static Address emitMergePHI(CodeGenFunction &CGF,
> +                            Address Addr1, llvm::BasicBlock *Block1,
> +                            Address Addr2, llvm::BasicBlock *Block2,
> +                            const llvm::Twine &Name = "") {
> +  assert(Addr1.getType() == Addr2.getType());
> +  llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name);
> +  PHI->addIncoming(Addr1.getPointer(), Block1);
> +  PHI->addIncoming(Addr2.getPointer(), Block2);
> +  CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment());
> +  return Address(PHI, Align);
> +}
> +
>  TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; }
>
>  // If someone can figure out a general rule for this, that would be great.
> @@ -394,8 +526,8 @@ public:
>        I.info = classifyArgumentType(I.type);
>    }
>
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override;
> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                    QualType Ty) const override;
>  };
>
>  class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
> @@ -404,9 +536,9 @@ public:
>      : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
>  };
>
> -llvm::Value *DefaultABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType
> Ty,
> -                                       CodeGenFunction &CGF) const {
> -  return nullptr;
> +Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
> VAListAddr,
> +                                  QualType Ty) const {
> +  return Address::invalid();
>  }
>
>  ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
> @@ -416,9 +548,9 @@ ABIArgInfo DefaultABIInfo::classifyArgum
>      // Records with non-trivial destructors/copy-constructors should not
> be
>      // passed by value.
>      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
> -      return ABIArgInfo::getIndirect(0, RAA ==
> CGCXXABI::RAA_DirectInMemory);
> +      return getNaturalAlignIndirect(Ty, RAA ==
> CGCXXABI::RAA_DirectInMemory);
>
> -    return ABIArgInfo::getIndirect(0);
> +    return getNaturalAlignIndirect(Ty);
>    }
>
>    // Treat an enum type as its underlying type.
> @@ -434,7 +566,7 @@ ABIArgInfo DefaultABIInfo::classifyRetur
>      return ABIArgInfo::getIgnore();
>
>    if (isAggregateTypeForABI(RetTy))
> -    return ABIArgInfo::getIndirect(0);
> +    return getNaturalAlignIndirect(RetTy);
>
>    // Treat an enum type as its underlying type.
>    if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
> @@ -482,10 +614,8 @@ ABIArgInfo WebAssemblyABIInfo::classifyA
>    if (isAggregateTypeForABI(Ty)) {
>      // Records with non-trivial destructors/copy-constructors should not
> be
>      // passed by value.
> -    unsigned TypeAlign =
> getContext().getTypeAlignInChars(Ty).getQuantity();
>      if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
> -      return ABIArgInfo::getIndirect(TypeAlign,
> -                                     RAA == CGCXXABI::RAA_DirectInMemory);
> +      return getNaturalAlignIndirect(Ty, RAA ==
> CGCXXABI::RAA_DirectInMemory);
>      // Ignore empty structs/unions.
>      if (isEmptyRecord(getContext(), Ty, true))
>        return ABIArgInfo::getIgnore();
> @@ -494,7 +624,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyA
>      // though watch out for things like bitfields.
>      if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
>        return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
> -    return ABIArgInfo::getIndirect(TypeAlign);
> +    return getNaturalAlignIndirect(Ty);
>    }
>
>    // Otherwise just do the default thing.
> @@ -536,8 +666,8 @@ class PNaClABIInfo : public ABIInfo {
>    ABIArgInfo classifyArgumentType(QualType RetTy) const;
>
>    void computeInfo(CGFunctionInfo &FI) const override;
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override;
> +  Address EmitVAArg(CodeGenFunction &CGF,
> +                    Address VAListAddr, QualType Ty) const override;
>  };
>
>  class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
> @@ -554,17 +684,17 @@ void PNaClABIInfo::computeInfo(CGFunctio
>      I.info = classifyArgumentType(I.type);
>  }
>
> -llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                                       CodeGenFunction &CGF) const {
> -  return nullptr;
> +Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                                QualType Ty) const {
> +  return Address::invalid();
>  }
>
>  /// \brief Classify argument of given type \p Ty.
>  ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
>    if (isAggregateTypeForABI(Ty)) {
>      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
> -      return ABIArgInfo::getIndirect(0, RAA ==
> CGCXXABI::RAA_DirectInMemory);
> -    return ABIArgInfo::getIndirect(0);
> +      return getNaturalAlignIndirect(Ty, RAA ==
> CGCXXABI::RAA_DirectInMemory);
> +    return getNaturalAlignIndirect(Ty);
>    } else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
>      // Treat an enum type as its underlying type.
>      Ty = EnumTy->getDecl()->getIntegerType();
> @@ -583,7 +713,7 @@ ABIArgInfo PNaClABIInfo::classifyReturnT
>
>    // In the PNaCl ABI we always return records/structures on the stack.
>    if (isAggregateTypeForABI(RetTy))
> -    return ABIArgInfo::getIndirect(0);
> +    return getNaturalAlignIndirect(RetTy);
>
>    // Treat an enum type as its underlying type.
>    if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
> @@ -687,7 +817,7 @@ class X86_32ABIInfo : public ABIInfo {
>    /// such that the argument will be passed in memory.
>    ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State)
> const;
>
> -  ABIArgInfo getIndirectReturnResult(CCState &State) const;
> +  ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
>
>    /// \brief Return the alignment to use for the given type on the stack.
>    unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
> @@ -702,14 +832,14 @@ class X86_32ABIInfo : public ABIInfo {
>    void rewriteWithInAlloca(CGFunctionInfo &FI) const;
>
>    void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
> -                           unsigned &StackOffset, ABIArgInfo &Info,
> +                           CharUnits &StackOffset, ABIArgInfo &Info,
>                             QualType Type) const;
>
>  public:
>
>    void computeInfo(CGFunctionInfo &FI) const override;
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override;
> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                    QualType Ty) const override;
>
>    X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w,
>                  unsigned r)
> @@ -893,14 +1023,14 @@ bool X86_32ABIInfo::shouldReturnTypeInRe
>    return true;
>  }
>
> -ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(CCState &State) const {
> +ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState
> &State) const {
>    // If the return value is indirect, then the hidden argument is
> consuming one
>    // integer register.
>    if (State.FreeRegs) {
>      --State.FreeRegs;
> -    return ABIArgInfo::getIndirectInReg(/*Align=*/0, /*ByVal=*/false);
> +    return getNaturalAlignIndirectInReg(RetTy);
>    }
> -  return ABIArgInfo::getIndirect(/*Align=*/0, /*ByVal=*/false);
> +  return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
>  }
>
>  ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
> @@ -935,7 +1065,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturn
>          return
> ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
>                                                              Size));
>
> -      return getIndirectReturnResult(State);
> +      return getIndirectReturnResult(RetTy, State);
>      }
>
>      return ABIArgInfo::getDirect();
> @@ -945,12 +1075,12 @@ ABIArgInfo X86_32ABIInfo::classifyReturn
>      if (const RecordType *RT = RetTy->getAs<RecordType>()) {
>        // Structures with flexible arrays are always indirect.
>        if (RT->getDecl()->hasFlexibleArrayMember())
> -        return getIndirectReturnResult(State);
> +        return getIndirectReturnResult(RetTy, State);
>      }
>
>      // If specified, structs and unions are always indirect.
>      if (!IsSmallStructInRegABI && !RetTy->isAnyComplexType())
> -      return getIndirectReturnResult(State);
> +      return getIndirectReturnResult(RetTy, State);
>
>      // Small structures which are register sized are generally returned
>      // in a register.
> @@ -972,7 +1102,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturn
>        return
> ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
>      }
>
> -    return getIndirectReturnResult(State);
> +    return getIndirectReturnResult(RetTy, State);
>    }
>
>    // Treat an enum type as its underlying type.
> @@ -1038,21 +1168,22 @@ ABIArgInfo X86_32ABIInfo::getIndirectRes
>    if (!ByVal) {
>      if (State.FreeRegs) {
>        --State.FreeRegs; // Non-byval indirects just use one pointer.
> -      return ABIArgInfo::getIndirectInReg(0, false);
> +      return getNaturalAlignIndirectInReg(Ty);
>      }
> -    return ABIArgInfo::getIndirect(0, false);
> +    return getNaturalAlignIndirect(Ty, false);
>    }
>
>    // Compute the byval alignment.
>    unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
>    unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
>    if (StackAlign == 0)
> -    return ABIArgInfo::getIndirect(4, /*ByVal=*/true);
> +    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4),
> /*ByVal=*/true);
>
>    // If the stack alignment is less than the type alignment, realign the
>    // argument.
>    bool Realign = TypeAlign > StackAlign;
> -  return ABIArgInfo::getIndirect(StackAlign, /*ByVal=*/true, Realign);
> +  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign),
> +                                 /*ByVal=*/true, Realign);
>  }
>
>  X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
> @@ -1259,22 +1390,23 @@ void X86_32ABIInfo::computeInfo(CGFuncti
>
>  void
>  X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6>
> &FrameFields,
> -                                   unsigned &StackOffset,
> -                                   ABIArgInfo &Info, QualType Type) const
> {
> -  assert(StackOffset % 4U == 0 && "unaligned inalloca struct");
> +                                   CharUnits &StackOffset, ABIArgInfo
> &Info,
> +                                   QualType Type) const {
> +  // Arguments are always 4-byte-aligned.
> +  CharUnits FieldAlign = CharUnits::fromQuantity(4);
> +
> +  assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca
> struct");
>    Info = ABIArgInfo::getInAlloca(FrameFields.size());
>    FrameFields.push_back(CGT.ConvertTypeForMem(Type));
> -  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
> +  StackOffset += getContext().getTypeSizeInChars(Type);
>
> -  // Insert padding bytes to respect alignment.  For x86_32, each
> argument is 4
> -  // byte aligned.
> -  if (StackOffset % 4U) {
> -    unsigned OldOffset = StackOffset;
> -    StackOffset = llvm::RoundUpToAlignment(StackOffset, 4U);
> -    unsigned NumBytes = StackOffset - OldOffset;
> -    assert(NumBytes);
> +  // Insert padding bytes to respect alignment.
> +  CharUnits FieldEnd = StackOffset;
> +  StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign);
> +  if (StackOffset != FieldEnd) {
> +    CharUnits NumBytes = StackOffset - FieldEnd;
>      llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
> -    Ty = llvm::ArrayType::get(Ty, NumBytes);
> +    Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
>      FrameFields.push_back(Ty);
>    }
>  }
> @@ -1305,7 +1437,10 @@ void X86_32ABIInfo::rewriteWithInAlloca(
>    // Build a packed struct type for all of the arguments in memory.
>    SmallVector<llvm::Type *, 6> FrameFields;
>
> -  unsigned StackOffset = 0;
> +  // The stack alignment is always 4.
> +  CharUnits StackAlign = CharUnits::fromQuantity(4);
> +
> +  CharUnits StackOffset;
>    CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
>
>    // Put 'this' into the struct before 'sret', if necessary.
> @@ -1337,47 +1472,25 @@ void X86_32ABIInfo::rewriteWithInAlloca(
>    }
>
>    FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
> -                                        /*isPacked=*/true));
> +                                        /*isPacked=*/true),
> +                  StackAlign);
>  }
>
> -llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType
> Ty,
> -                                      CodeGenFunction &CGF) const {
> -  llvm::Type *BPP = CGF.Int8PtrPtrTy;
> +Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
> +                                 Address VAListAddr, QualType Ty) const {
>
> -  CGBuilderTy &Builder = CGF.Builder;
> -  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
> -                                                       "ap");
> -  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
> -
> -  // Compute if the address needs to be aligned
> -  unsigned Align = CGF.getContext().getTypeAlignInChars(Ty).getQuantity();
> -  Align = getTypeStackAlignInBytes(Ty, Align);
> -  Align = std::max(Align, 4U);
> -  if (Align > 4) {
> -    // addr = (addr + align - 1) & -align;
> -    llvm::Value *Offset =
> -      llvm::ConstantInt::get(CGF.Int32Ty, Align - 1);
> -    Addr = CGF.Builder.CreateGEP(Addr, Offset);
> -    llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(Addr,
> -                                                    CGF.Int32Ty);
> -    llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -Align);
> -    Addr = CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask),
> -                                      Addr->getType(),
> -                                      "ap.cur.aligned");
> -  }
> -
> -  llvm::Type *PTy =
> -    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
> -  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
> -
> -  uint64_t Offset =
> -    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, Align);
> -  llvm::Value *NextAddr =
> -    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
> -                      "ap.next");
> -  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
> +  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
>
> -  return AddrTyped;
> +  // x86-32 changes the alignment of certain arguments on the stack.
> +  //
> +  // Just messing with TypeInfo like this works because we never pass
> +  // anything indirectly.
> +  TypeInfo.second = CharUnits::fromQuantity(
> +                getTypeStackAlignInBytes(Ty,
> TypeInfo.second.getQuantity()));
> +
> +  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
> +                          TypeInfo, CharUnits::fromQuantity(4),
> +                          /*AllowHigherAlign*/ true);
>  }
>
>  bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
> @@ -1449,8 +1562,9 @@ bool X86_32TargetCodeGenInfo::initDwarfE
>    } else {
>      // 9 is %eflags, which doesn't get a size on Darwin for some
>      // reason.
> -    Builder.CreateStore(
> -        Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address,
> 9));
> +    Builder.CreateAlignedStore(
> +        Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9),
> +                               CharUnits::One());
>
>      // 11-16 are st(0..5).  Not sure why we stop at 5.
>      // These have size 12, which is sizeof(long double) on
> @@ -1619,8 +1733,8 @@ public:
>
>    void computeInfo(CGFunctionInfo &FI) const override;
>
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override;
> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                    QualType Ty) const override;
>
>    bool has64BitPointers() const {
>      return Has64BitPointers;
> @@ -1638,8 +1752,8 @@ public:
>
>    void computeInfo(CGFunctionInfo &FI) const override;
>
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override;
> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                    QualType Ty) const override;
>
>    bool isHomogeneousAggregateBaseType(QualType Ty) const override {
>      // FIXME: Assumes vectorcall is in use.
> @@ -2257,7 +2371,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectRet
>              ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
>    }
>
> -  return ABIArgInfo::getIndirect(0);
> +  return getNaturalAlignIndirect(Ty);
>  }
>
>  bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
> @@ -2291,7 +2405,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectRes
>    }
>
>    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
> -    return ABIArgInfo::getIndirect(0, RAA ==
> CGCXXABI::RAA_DirectInMemory);
> +    return getNaturalAlignIndirect(Ty, RAA ==
> CGCXXABI::RAA_DirectInMemory);
>
>    // Compute the byval alignment. We specify the alignment of the byval
> in all
>    // cases so that the mid-level optimizer knows the alignment of the
> byval.
> @@ -2328,7 +2442,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectRes
>                                                            Size));
>    }
>
> -  return ABIArgInfo::getIndirect(Align);
> +  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align));
>  }
>
>  /// The ABI specifies that a value should be passed in a full vector
> XMM/YMM
> @@ -2912,11 +3026,10 @@ void X86_64ABIInfo::computeInfo(CGFuncti
>    }
>  }
>
> -static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr,
> -                                        QualType Ty,
> -                                        CodeGenFunction &CGF) {
> -  llvm::Value *overflow_arg_area_p = CGF.Builder.CreateStructGEP(
> -      nullptr, VAListAddr, 2, "overflow_arg_area_p");
> +static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
> +                                         Address VAListAddr, QualType Ty)
> {
> +  Address overflow_arg_area_p = CGF.Builder.CreateStructGEP(
> +      VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p");
>    llvm::Value *overflow_arg_area =
>      CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
>
> @@ -2924,7 +3037,7 @@ static llvm::Value *EmitVAArgFromMemory(
>    // byte boundary if alignment needed by type exceeds 8 byte boundary.
>    // It isn't stated explicitly in the standard, but in practice we use
>    // alignment greater than 16 where necessary.
> -  uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
> +  uint64_t Align = CGF.getContext().getTypeAlignInChars(Ty).getQuantity();
>    if (Align > 8) {
>      // overflow_arg_area = (overflow_arg_area + align - 1) & -align;
>      llvm::Value *Offset =
> @@ -2958,11 +3071,11 @@ static llvm::Value *EmitVAArgFromMemory(
>    CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);
>
>    // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
> -  return Res;
> +  return Address(Res, CharUnits::fromQuantity(Align));
>  }
>
> -llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType
> Ty,
> -                                      CodeGenFunction &CGF) const {
> +Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                                 QualType Ty) const {
>    // Assume that va_list type is correct; should be pointer to LLVM type:
>    // struct {
>    //   i32 gp_offset;
> @@ -2972,14 +3085,14 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>    // };
>    unsigned neededInt, neededSSE;
>
> -  Ty = CGF.getContext().getCanonicalType(Ty);
> +  Ty = getContext().getCanonicalType(Ty);
>    ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
>                                         /*isNamedArg*/false);
>
>    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
>    // in the registers. If not go to step 7.
>    if (!neededInt && !neededSSE)
> -    return EmitVAArgFromMemory(VAListAddr, Ty, CGF);
> +    return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
>
>    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
>    // general purpose registers needed to pass type and num_fp to hold
> @@ -2993,11 +3106,12 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>    // register save space).
>
>    llvm::Value *InRegs = nullptr;
> -  llvm::Value *gp_offset_p = nullptr, *gp_offset = nullptr;
> -  llvm::Value *fp_offset_p = nullptr, *fp_offset = nullptr;
> +  Address gp_offset_p = Address::invalid(), fp_offset_p =
> Address::invalid();
> +  llvm::Value *gp_offset = nullptr, *fp_offset = nullptr;
>    if (neededInt) {
>      gp_offset_p =
> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 0,
> "gp_offset_p");
> +        CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(),
> +                                    "gp_offset_p");
>      gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
>      InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
>      InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
> @@ -3005,7 +3119,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>
>    if (neededSSE) {
>      fp_offset_p =
> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 1,
> "fp_offset_p");
> +        CGF.Builder.CreateStructGEP(VAListAddr, 1,
> CharUnits::fromQuantity(4),
> +                                    "fp_offset_p");
>      fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
>      llvm::Value *FitsInFP =
>        llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
> @@ -3033,14 +3148,17 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>    // simple assembling of a structure from scattered addresses has many
> more
>    // loads than necessary. Can we clean this up?
>    llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
> -  llvm::Value *RegAddr = CGF.Builder.CreateLoad(
> -      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3),
> "reg_save_area");
> +  llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
> +      CGF.Builder.CreateStructGEP(VAListAddr, 3,
> CharUnits::fromQuantity(16)),
> +                                  "reg_save_area");
> +
> +  Address RegAddr = Address::invalid();
>    if (neededInt && neededSSE) {
>      // FIXME: Cleanup.
>      assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
>      llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
> -    llvm::Value *Tmp = CGF.CreateMemTemp(Ty);
> -    Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
> +    Address Tmp = CGF.CreateMemTemp(Ty);
> +    Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
>      assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed
> regs");
>      llvm::Type *TyLo = ST->getElementType(0);
>      llvm::Type *TyHi = ST->getElementType(1);
> @@ -3048,57 +3166,77 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>             "Unexpected ABI info for mixed regs");
>      llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
>      llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
> -    llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegAddr, gp_offset);
> -    llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegAddr, fp_offset);
> +    llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegSaveArea, gp_offset);
> +    llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegSaveArea, fp_offset);
>      llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
>      llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
> +
> +    // Copy the first element.
>      llvm::Value *V =
> -      CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegLoAddr, PTyLo));
> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
> -    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegHiAddr,
> PTyHi));
> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
> +      CGF.Builder.CreateDefaultAlignedLoad(
> +                               CGF.Builder.CreateBitCast(RegLoAddr,
> PTyLo));
> +    CGF.Builder.CreateStore(V,
> +                    CGF.Builder.CreateStructGEP(Tmp, 0,
> CharUnits::Zero()));
> +
> +    // Copy the second element.
> +    V = CGF.Builder.CreateDefaultAlignedLoad(
> +                               CGF.Builder.CreateBitCast(RegHiAddr,
> PTyHi));
> +    CharUnits Offset = CharUnits::fromQuantity(
> +
>  getDataLayout().getStructLayout(ST)->getElementOffset(1));
> +    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1,
> Offset));
>
> -    RegAddr = CGF.Builder.CreateBitCast(Tmp,
> -
> llvm::PointerType::getUnqual(LTy));
> +    RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
>    } else if (neededInt) {
> -    RegAddr = CGF.Builder.CreateGEP(RegAddr, gp_offset);
> -    RegAddr = CGF.Builder.CreateBitCast(RegAddr,
> -
> llvm::PointerType::getUnqual(LTy));
> +    RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, gp_offset),
> +                      CharUnits::fromQuantity(8));
> +    RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
>
>      // Copy to a temporary if necessary to ensure the appropriate
> alignment.
>      std::pair<CharUnits, CharUnits> SizeAlign =
> -        CGF.getContext().getTypeInfoInChars(Ty);
> +        getContext().getTypeInfoInChars(Ty);
>      uint64_t TySize = SizeAlign.first.getQuantity();
> -    unsigned TyAlign = SizeAlign.second.getQuantity();
> -    if (TyAlign > 8) {
> -      llvm::Value *Tmp = CGF.CreateMemTemp(Ty);
> -      CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, 8, false);
> +    CharUnits TyAlign = SizeAlign.second;
> +
> +    // Copy into a temporary if the type is more aligned than the
> +    // register save area.
> +    if (TyAlign.getQuantity() > 8) {
> +      Address Tmp = CGF.CreateMemTemp(Ty);
> +      CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
>        RegAddr = Tmp;
>      }
> +
>    } else if (neededSSE == 1) {
> -    RegAddr = CGF.Builder.CreateGEP(RegAddr, fp_offset);
> -    RegAddr = CGF.Builder.CreateBitCast(RegAddr,
> -
> llvm::PointerType::getUnqual(LTy));
> +    RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset),
> +                      CharUnits::fromQuantity(16));
> +    RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
>    } else {
>      assert(neededSSE == 2 && "Invalid number of needed registers!");
>      // SSE registers are spaced 16 bytes apart in the register save
>      // area, we need to collect the two eightbytes together.
> -    llvm::Value *RegAddrLo = CGF.Builder.CreateGEP(RegAddr, fp_offset);
> -    llvm::Value *RegAddrHi = CGF.Builder.CreateConstGEP1_32(RegAddrLo,
> 16);
> +    // The ABI isn't explicit about this, but it seems reasonable
> +    // to assume that the slots are 16-byte aligned, since the stack is
> +    // naturally 16-byte aligned and the prologue is expected to store
> +    // all the SSE registers to the RSA.
> +    Address RegAddrLo = Address(CGF.Builder.CreateGEP(RegSaveArea,
> fp_offset),
> +                                CharUnits::fromQuantity(16));
> +    Address RegAddrHi =
> +      CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
> +                                             CharUnits::fromQuantity(16));
>      llvm::Type *DoubleTy = CGF.DoubleTy;
> -    llvm::Type *DblPtrTy =
> -      llvm::PointerType::getUnqual(DoubleTy);
>      llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy,
> nullptr);
> -    llvm::Value *V, *Tmp = CGF.CreateMemTemp(Ty);
> -    Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
> -    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrLo,
> -                                                         DblPtrTy));
> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
> -    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrHi,
> -                                                         DblPtrTy));
> -    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
> -    RegAddr = CGF.Builder.CreateBitCast(Tmp,
> -
> llvm::PointerType::getUnqual(LTy));
> +    llvm::Value *V;
> +    Address Tmp = CGF.CreateMemTemp(Ty);
> +    Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
> +    V = CGF.Builder.CreateLoad(
> +                   CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy));
> +    CGF.Builder.CreateStore(V,
> +                   CGF.Builder.CreateStructGEP(Tmp, 0,
> CharUnits::Zero()));
> +    V = CGF.Builder.CreateLoad(
> +                   CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy));
> +    CGF.Builder.CreateStore(V,
> +          CGF.Builder.CreateStructGEP(Tmp, 1,
> CharUnits::fromQuantity(8)));
> +
> +    RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
>    }
>
>    // AMD64-ABI 3.5.7p5: Step 5. Set:
> @@ -3119,15 +3257,13 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(ll
>    // Emit code to load the value if it was passed in memory.
>
>    CGF.EmitBlock(InMemBlock);
> -  llvm::Value *MemAddr = EmitVAArgFromMemory(VAListAddr, Ty, CGF);
> +  Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
>
>    // Return the appropriate result.
>
>    CGF.EmitBlock(ContBlock);
> -  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(RegAddr->getType(), 2,
> -                                                 "vaarg.addr");
> -  ResAddr->addIncoming(RegAddr, InRegBlock);
> -  ResAddr->addIncoming(MemAddr, InMemBlock);
> +  Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr,
> InMemBlock,
> +                                 "vaarg.addr");
>    return ResAddr;
>  }
>
> @@ -3148,11 +3284,11 @@ ABIArgInfo WinX86_64ABIInfo::classify(Qu
>    if (RT) {
>      if (!IsReturnType) {
>        if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
> -        return ABIArgInfo::getIndirect(0, RAA ==
> CGCXXABI::RAA_DirectInMemory);
> +        return getNaturalAlignIndirect(Ty, RAA ==
> CGCXXABI::RAA_DirectInMemory);
>      }
>
>      if (RT->getDecl()->hasFlexibleArrayMember())
> -      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
> +      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>
>      // FIXME: mingw-w64-gcc emits 128-bit struct as i128
>      if (Width == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
> @@ -3171,7 +3307,8 @@ ABIArgInfo WinX86_64ABIInfo::classify(Qu
>          return ABIArgInfo::getDirect();
>        return ABIArgInfo::getExpand();
>      }
> -    return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
> +    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align),
> +                                   /*ByVal=*/false);
>    }
>
>
> @@ -3187,7 +3324,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(Qu
>      // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes,
> or is
>      // not 1, 2, 4, or 8 bytes, must be passed by reference."
>      if (Width > 64 || !llvm::isPowerOf2_64(Width))
> -      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
> +      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>
>      // Otherwise, coerce it to a small integer.
>      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
> Width));
> @@ -3217,26 +3354,12 @@ void WinX86_64ABIInfo::computeInfo(CGFun
>      I.info = classify(I.type, FreeSSERegs, false);
>  }
>
> -llvm::Value *WinX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
> QualType Ty,
> -                                      CodeGenFunction &CGF) const {
> -  llvm::Type *BPP = CGF.Int8PtrPtrTy;
> -
> -  CGBuilderTy &Builder = CGF.Builder;
> -  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
> -                                                       "ap");
> -  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
> -  llvm::Type *PTy =
> -    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
> -  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
> -
> -  uint64_t Offset =
> -    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 8);
> -  llvm::Value *NextAddr =
> -    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
> -                      "ap.next");
> -  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
> -
> -  return AddrTyped;
> +Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
> VAListAddr,
> +                                    QualType Ty) const {
> +  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
> +                          CGF.getContext().getTypeInfoInChars(Ty),
> +                          CharUnits::fromQuantity(8),
> +                          /*allowHigherAlign*/ false);
>  }
>
>  // PowerPC-32
> @@ -3246,8 +3369,8 @@ class PPC32_SVR4_ABIInfo : public Defaul
>  public:
>    PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
>
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override;
> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                    QualType Ty) const override;
>  };
>
>  class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
> @@ -3266,64 +3389,50 @@ public:
>
>  }
>
> -llvm::Value *PPC32_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
> -                                           QualType Ty,
> -                                           CodeGenFunction &CGF) const {
> +Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
> VAList,
> +                                      QualType Ty) const {
>    if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
>      // TODO: Implement this. For now ignore.
>      (void)CTy;
> -    return nullptr;
> +    return Address::invalid();
>    }
>
> +  // struct __va_list_tag {
> +  //   unsigned char gpr;
> +  //   unsigned char fpr;
> +  //   unsigned short reserved;
> +  //   void *overflow_arg_area;
> +  //   void *reg_save_area;
> +  // };
> +
>    bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64;
>    bool isInt =
>        Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType();
> -  llvm::Type *CharPtr = CGF.Int8PtrTy;
> -  llvm::Type *CharPtrPtr = CGF.Int8PtrPtrTy;
> +
> +  // All aggregates are passed indirectly?  That doesn't seem consistent
> +  // with the argument-lowering code.
> +  bool isIndirect = Ty->isAggregateType();
>
>    CGBuilderTy &Builder = CGF.Builder;
> -  llvm::Value *GPRPtr = Builder.CreateBitCast(VAListAddr, CharPtr,
> "gprptr");
> -  llvm::Value *GPRPtrAsInt = Builder.CreatePtrToInt(GPRPtr, CGF.Int32Ty);
> -  llvm::Value *FPRPtrAsInt =
> -      Builder.CreateAdd(GPRPtrAsInt, Builder.getInt32(1));
> -  llvm::Value *FPRPtr = Builder.CreateIntToPtr(FPRPtrAsInt, CharPtr);
> -  llvm::Value *OverflowAreaPtrAsInt =
> -      Builder.CreateAdd(FPRPtrAsInt, Builder.getInt32(3));
> -  llvm::Value *OverflowAreaPtr =
> -      Builder.CreateIntToPtr(OverflowAreaPtrAsInt, CharPtrPtr);
> -  llvm::Value *RegsaveAreaPtrAsInt =
> -      Builder.CreateAdd(OverflowAreaPtrAsInt, Builder.getInt32(4));
> -  llvm::Value *RegsaveAreaPtr =
> -      Builder.CreateIntToPtr(RegsaveAreaPtrAsInt, CharPtrPtr);
> -  llvm::Value *GPR = Builder.CreateLoad(GPRPtr, false, "gpr");
> -  // Align GPR when TY is i64.
> -  if (isI64) {
> -    llvm::Value *GPRAnd = Builder.CreateAnd(GPR, Builder.getInt8(1));
> -    llvm::Value *CC64 = Builder.CreateICmpEQ(GPRAnd, Builder.getInt8(1));
> -    llvm::Value *GPRPlusOne = Builder.CreateAdd(GPR, Builder.getInt8(1));
> -    GPR = Builder.CreateSelect(CC64, GPRPlusOne, GPR);
> -  }
> -  llvm::Value *FPR = Builder.CreateLoad(FPRPtr, false, "fpr");
> -  llvm::Value *OverflowArea =
> -      Builder.CreateLoad(OverflowAreaPtr, false, "overflow_area");
> -  llvm::Value *OverflowAreaAsInt =
> -      Builder.CreatePtrToInt(OverflowArea, CGF.Int32Ty);
> -  llvm::Value *RegsaveArea =
> -      Builder.CreateLoad(RegsaveAreaPtr, false, "regsave_area");
> -  llvm::Value *RegsaveAreaAsInt =
> -      Builder.CreatePtrToInt(RegsaveArea, CGF.Int32Ty);
>
> -  llvm::Value *CC =
> -      Builder.CreateICmpULT(isInt ? GPR : FPR, Builder.getInt8(8),
> "cond");
> +  // The calling convention either uses 1-2 GPRs or 1 FPR.
> +  Address NumRegsAddr = Address::invalid();
> +  if (isInt) {
> +    NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(),
> "gpr");
> +  } else {
> +    NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(),
> "fpr");
> +  }
>
> -  llvm::Value *RegConstant =
> -      Builder.CreateMul(isInt ? GPR : FPR, Builder.getInt8(isInt ? 4 :
> 8));
> +  llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs");
>
> -  llvm::Value *OurReg = Builder.CreateAdd(
> -      RegsaveAreaAsInt, Builder.CreateSExt(RegConstant, CGF.Int32Ty));
> +  // "Align" the register count when TY is i64.
> +  if (isI64) {
> +    NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1));
> +    NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U));
> +  }
>
> -  if (Ty->isFloatingType())
> -    OurReg = Builder.CreateAdd(OurReg, Builder.getInt32(32));
> +  llvm::Value *CC =
> +      Builder.CreateICmpULT(NumRegs, Builder.getInt8(8), "cond");
>
>    llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs");
>    llvm::BasicBlock *UsingOverflow =
> CGF.createBasicBlock("using_overflow");
> @@ -3331,39 +3440,84 @@ llvm::Value *PPC32_SVR4_ABIInfo::EmitVAA
>
>    Builder.CreateCondBr(CC, UsingRegs, UsingOverflow);
>
> -  CGF.EmitBlock(UsingRegs);
> +  llvm::Type *DirectTy = CGF.ConvertType(Ty);
> +  if (isIndirect) DirectTy = DirectTy->getPointerTo(0);
>
> -  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
> -  llvm::Value *Result1 = Builder.CreateIntToPtr(OurReg, PTy);
> -  // Increase the GPR/FPR indexes.
> -  if (isInt) {
> -    GPR = Builder.CreateAdd(GPR, Builder.getInt8(isI64 ? 2 : 1));
> -    Builder.CreateStore(GPR, GPRPtr);
> -  } else {
> -    FPR = Builder.CreateAdd(FPR, Builder.getInt8(1));
> -    Builder.CreateStore(FPR, FPRPtr);
> -  }
> -  CGF.EmitBranch(Cont);
> -
> -  CGF.EmitBlock(UsingOverflow);
> +  // Case 1: consume registers.
> +  Address RegAddr = Address::invalid();
> +  {
> +    CGF.EmitBlock(UsingRegs);
> +
> +    Address RegSaveAreaPtr =
> +      Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8));
> +    RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr),
> +                      CharUnits::fromQuantity(8));
> +    assert(RegAddr.getElementType() == CGF.Int8Ty);
> +
> +    // Floating-point registers start after the general-purpose registers.
> +    if (!isInt) {
> +      RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr,
> +
>  CharUnits::fromQuantity(32));
> +    }
> +
> +    // Get the address of the saved value by scaling the number of
> +    // registers we've used by the number of
> +    CharUnits RegSize = CharUnits::fromQuantity(isInt ? 4 : 8);
> +    llvm::Value *RegOffset =
> +      Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity()));
> +    RegAddr = Address(Builder.CreateInBoundsGEP(CGF.Int8Ty,
> +                                            RegAddr.getPointer(),
> RegOffset),
> +
> RegAddr.getAlignment().alignmentOfArrayElement(RegSize));
> +    RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy);
> +
> +    // Increase the used-register count.
> +    NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(isI64 ? 2 : 1));
> +    Builder.CreateStore(NumRegs, NumRegsAddr);
> +
> +    CGF.EmitBranch(Cont);
> +  }
> +
> +  // Case 2: consume space in the overflow area.
> +  Address MemAddr = Address::invalid();
> +  {
> +    CGF.EmitBlock(UsingOverflow);
> +
> +    // Everything in the overflow area is rounded up to a size of at
> least 4.
> +    CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4);
> +
> +    CharUnits Size;
> +    if (!isIndirect) {
> +      auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
> +      Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign);
> +    } else {
> +      Size = CGF.getPointerSize();
> +    }
>
> -  // Increase the overflow area.
> -  llvm::Value *Result2 = Builder.CreateIntToPtr(OverflowAreaAsInt, PTy);
> -  OverflowAreaAsInt =
> -      Builder.CreateAdd(OverflowAreaAsInt, Builder.getInt32(isInt ? 4 :
> 8));
> -  Builder.CreateStore(Builder.CreateIntToPtr(OverflowAreaAsInt, CharPtr),
> -                      OverflowAreaPtr);
> -  CGF.EmitBranch(Cont);
> +    Address OverflowAreaAddr =
> +      Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4));
> +    Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr),
> +                         OverflowAreaAlign);
> +
> +    // The current address is the address of the varargs element.
> +    // FIXME: do we not need to round up to alignment?
> +    MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy);
> +
> +    // Increase the overflow area.
> +    OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size);
> +    Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr);
> +    CGF.EmitBranch(Cont);
> +  }
>
>    CGF.EmitBlock(Cont);
>
> -  llvm::PHINode *Result = CGF.Builder.CreatePHI(PTy, 2, "vaarg.addr");
> -  Result->addIncoming(Result1, UsingRegs);
> -  Result->addIncoming(Result2, UsingOverflow);
> -
> -  if (Ty->isAggregateType()) {
> -    llvm::Value *AGGPtr = Builder.CreateBitCast(Result, CharPtrPtr,
> "aggrptr");
> -    return Builder.CreateLoad(AGGPtr, false, "aggr");
> +  // Merge the cases with a phi.
> +  Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr,
> UsingOverflow,
> +                                "vaarg.addr");
> +
> +  // Load the pointer if the argument was passed indirectly.
> +  if (isIndirect) {
> +    Result = Address(Builder.CreateLoad(Result, "aggr"),
> +                     getContext().getTypeAlignInChars(Ty));
>    }
>
>    return Result;
> @@ -3459,7 +3613,7 @@ public:
>      : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
>
>    bool isPromotableTypeForABI(QualType Ty) const;
> -  bool isAlignedParamType(QualType Ty, bool &Align32) const;
> +  CharUnits getParamTypeAlignment(QualType Ty) const;
>
>    ABIArgInfo classifyReturnType(QualType RetTy) const;
>    ABIArgInfo classifyArgumentType(QualType Ty) const;
> @@ -3496,8 +3650,8 @@ public:
>      }
>    }
>
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override;
> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                    QualType Ty) const override;
>  };
>
>  class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
> @@ -3557,12 +3711,9 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForA
>    return false;
>  }
>
> -/// isAlignedParamType - Determine whether a type requires 16-byte
> -/// alignment in the parameter area.
> -bool
> -PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty, bool &Align32) const {
> -  Align32 = false;
> -
> +/// isAlignedParamType - Determine whether a type requires 16-byte or
> +/// higher alignment in the parameter area.  Always returns at least 8.
> +CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
>    // Complex types are passed just like their elements.
>    if (const ComplexType *CTy = Ty->getAs<ComplexType>())
>      Ty = CTy->getElementType();
> @@ -3571,11 +3722,11 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(Q
>    // passed via reference, smaller types are not aligned).
>    if (IsQPXVectorTy(Ty)) {
>      if (getContext().getTypeSize(Ty) > 128)
> -      Align32 = true;
> +      return CharUnits::fromQuantity(32);
>
> -    return true;
> +    return CharUnits::fromQuantity(16);
>    } else if (Ty->isVectorType()) {
> -    return getContext().getTypeSize(Ty) == 128;
> +    return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ?
> 16 : 8);
>    }
>
>    // For single-element float/vector structs, we consider the whole type
> @@ -3600,22 +3751,22 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(Q
>    // With special case aggregates, only vector base types need alignment.
>    if (AlignAsType && IsQPXVectorTy(AlignAsType)) {
>      if (getContext().getTypeSize(AlignAsType) > 128)
> -      Align32 = true;
> +      return CharUnits::fromQuantity(32);
>
> -    return true;
> +    return CharUnits::fromQuantity(16);
>    } else if (AlignAsType) {
> -    return AlignAsType->isVectorType();
> +    return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8);
>    }
>
>    // Otherwise, we only need alignment for any aggregate type that
>    // has an alignment requirement of >= 16 bytes.
>    if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
>      if (HasQPX && getContext().getTypeAlign(Ty) >= 256)
> -      Align32 = true;
> -    return true;
> +      return CharUnits::fromQuantity(32);
> +    return CharUnits::fromQuantity(16);
>    }
>
> -  return false;
> +  return CharUnits::fromQuantity(8);
>  }
>
>  /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
> @@ -3748,7 +3899,7 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
>    if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) {
>      uint64_t Size = getContext().getTypeSize(Ty);
>      if (Size > 128)
> -      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
> +      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>      else if (Size < 128) {
>        llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
>        return ABIArgInfo::getDirect(CoerceTy);
> @@ -3757,12 +3908,10 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
>
>    if (isAggregateTypeForABI(Ty)) {
>      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
> -      return ABIArgInfo::getIndirect(0, RAA ==
> CGCXXABI::RAA_DirectInMemory);
> +      return getNaturalAlignIndirect(Ty, RAA ==
> CGCXXABI::RAA_DirectInMemory);
>
> -    bool Align32;
> -    uint64_t ABIAlign = isAlignedParamType(Ty, Align32) ?
> -                          (Align32 ? 32 : 16) : 8;
> -    uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
> +    uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity();
> +    uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
>
>      // ELFv2 homogeneous aggregates are passed as array types.
>      const Type *Base = nullptr;
> @@ -3800,7 +3949,8 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
>      }
>
>      // All other aggregates are passed ByVal.
> -    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
> +    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
> +                                   /*ByVal=*/true,
>                                     /*Realign=*/TyAlign > ABIAlign);
>    }
>
> @@ -3821,7 +3971,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(Q
>    if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) {
>      uint64_t Size = getContext().getTypeSize(RetTy);
>      if (Size > 128)
> -      return ABIArgInfo::getIndirect(0);
> +      return getNaturalAlignIndirect(RetTy);
>      else if (Size < 128) {
>        llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
>        return ABIArgInfo::getDirect(CoerceTy);
> @@ -3856,7 +4006,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(Q
>      }
>
>      // All other aggregates are returned indirectly.
> -    return ABIArgInfo::getIndirect(0);
> +    return getNaturalAlignIndirect(RetTy);
>    }
>
>    return (isPromotableTypeForABI(RetTy) ?
> @@ -3864,47 +4014,12 @@ PPC64_SVR4_ABIInfo::classifyReturnType(Q
>  }
>
>  // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
> -llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
> -                                           QualType Ty,
> -                                           CodeGenFunction &CGF) const {
> -  llvm::Type *BP = CGF.Int8PtrTy;
> -  llvm::Type *BPP = CGF.Int8PtrPtrTy;
> -
> -  CGBuilderTy &Builder = CGF.Builder;
> -  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
> "ap");
> -  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
> -
> -  // Handle types that require 16-byte alignment in the parameter save
> area.
> -  bool Align32;
> -  if (isAlignedParamType(Ty, Align32)) {
> -    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
> -    AddrAsInt = Builder.CreateAdd(AddrAsInt,
> -                                  Builder.getInt64(Align32 ? 31 : 15));
> -    AddrAsInt = Builder.CreateAnd(AddrAsInt,
> -                                  Builder.getInt64(Align32 ? -32 : -16));
> -    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
> -  }
> -
> -  // Update the va_list pointer.  The pointer should be bumped by the
> -  // size of the object.  We can trust getTypeSize() except for a complex
> -  // type whose base type is smaller than a doubleword.  For these, the
> -  // size of the object is 16 bytes; see below for further explanation.
> -  unsigned SizeInBytes = CGF.getContext().getTypeSize(Ty) / 8;
> -  QualType BaseTy;
> -  unsigned CplxBaseSize = 0;
> -
> -  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
> -    BaseTy = CTy->getElementType();
> -    CplxBaseSize = CGF.getContext().getTypeSize(BaseTy) / 8;
> -    if (CplxBaseSize < 8)
> -      SizeInBytes = 16;
> -  }
> +Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address
> VAListAddr,
> +                                      QualType Ty) const {
> +  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
> +  TypeInfo.second = getParamTypeAlignment(Ty);
>
> -  unsigned Offset = llvm::RoundUpToAlignment(SizeInBytes, 8);
> -  llvm::Value *NextAddr =
> -    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int64Ty, Offset),
> -                      "ap.next");
> -  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
> +  CharUnits SlotSize = CharUnits::fromQuantity(8);
>
>    // If we have a complex type and the base type is smaller than 8 bytes,
>    // the ABI calls for the real and imaginary parts to be right-adjusted
> @@ -3912,44 +4027,40 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAA
>    // pointer to a structure with the two parts packed tightly.  So
> generate
>    // loads of the real and imaginary parts relative to the va_list
> pointer,
>    // and store them to a temporary structure.
> -  if (CplxBaseSize && CplxBaseSize < 8) {
> -    llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
> -    llvm::Value *ImagAddr = RealAddr;
> -    if (CGF.CGM.getDataLayout().isBigEndian()) {
> -      RealAddr =
> -          Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
> -      ImagAddr =
> -          Builder.CreateAdd(ImagAddr, Builder.getInt64(16 -
> CplxBaseSize));
> -    } else {
> -      ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(8));
> +  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
> +    CharUnits EltSize = TypeInfo.first / 2;
> +    if (EltSize < SlotSize) {
> +      Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty,
> +                                            SlotSize * 2, SlotSize,
> +                                            SlotSize, /*AllowHigher*/
> true);
> +
> +      Address RealAddr = Addr;
> +      Address ImagAddr = RealAddr;
> +      if (CGF.CGM.getDataLayout().isBigEndian()) {
> +        RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr,
> +                                                          SlotSize -
> EltSize);
> +        ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
> +                                                      2 * SlotSize -
> EltSize);
> +      } else {
> +        ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr,
> SlotSize);
> +      }
> +
> +      llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
> +      RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
> +      ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
> +      llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
> +      llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
> +
> +      Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
> +      CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
> +                             /*init*/ true);
> +      return Temp;
>      }
> -    llvm::Type *PBaseTy =
> llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy));
> -    RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy);
> -    ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
> -    llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal");
> -    llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag");
> -    llvm::AllocaInst *Ptr =
> -        CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty), "vacplx");
> -    llvm::Value *RealPtr =
> -        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 0, ".real");
> -    llvm::Value *ImagPtr =
> -        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 1, ".imag");
> -    Builder.CreateStore(Real, RealPtr, false);
> -    Builder.CreateStore(Imag, ImagPtr, false);
> -    return Ptr;
> -  }
> -
> -  // If the argument is smaller than 8 bytes, it is right-adjusted in
> -  // its doubleword slot.  Adjust the pointer to pick it up from the
> -  // correct offset.
> -  if (SizeInBytes < 8 && CGF.CGM.getDataLayout().isBigEndian()) {
> -    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
> -    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(8 -
> SizeInBytes));
> -    Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
>    }
>
> -  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
> -  return Builder.CreateBitCast(Addr, PTy);
> +  // Otherwise, just use the general rule.
> +  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
> +                          TypeInfo, SlotSize, /*AllowHigher*/ true);
>  }
>
>  static bool
> @@ -4047,14 +4158,14 @@ private:
>        it.info = classifyArgumentType(it.type);
>    }
>
> -  llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                               CodeGenFunction &CGF) const;
> +  Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
> +                          CodeGenFunction &CGF) const;
>
> -  llvm::Value *EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                              CodeGenFunction &CGF) const;
> +  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
> +                         CodeGenFunction &CGF) const;
>
> -  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
> -                         CodeGenFunction &CGF) const override {
> +  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
> +                    QualType Ty) const override {
>      return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
>                           : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
>    }
> @@ -4097,7 +4208,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
>            llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()),
> 4);
>        return ABIArgInfo::getDirect(ResType);
>      }
> -    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
> +    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>    }
>
>    if (!isAggregateTypeForABI(Ty)) {
> @@ -4113,8 +4224,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
>    // Structures with either a non-trivial destructor or a non-trivial
>    // copy constructor are always indirect.
>    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
> -    return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
> -                                   CGCXXABI::RAA_DirectInMemory);
> +    return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
> +                                     CGCXXABI::RAA_DirectInMemory);
>    }
>
>    // Empty records are always ignored on Darwin, but actually passed in
> C++ mode
> @@ -4149,7 +4260,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
>      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
> Size));
>    }
>
> -  return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
> +  return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
>  }
>
>  ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
> @@ -4158,7 +4269,7 @@ ABIArgInfo AArch64ABIInfo::classifyRetur
>
>    // Large vector types should be returned via memory.
>    if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
> -    return ABIArgInfo::getIndirect(0);
> +    return getNaturalAlignIndirect(RetTy);
>
>    if (!isAggregateTypeForABI(RetTy)) {
>      // Treat an enum type as its underlying type.
> @@ -4194,7 +4305,7 @@ ABIArgInfo AArch64ABIInfo::classifyRetur
>      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
> Size));
>    }
>
> -  return ABIArgInfo::getIndirect(0);
> +  return getNaturalAlignIndirect(RetTy);
>  }
>
>  /// isIllegalVectorType - check whether the vector type is legal for
> AArch64.
> @@ -4232,7 +4343,7 @@ bool AArch64ABIInfo::isHomogeneousAggreg
>    return Members <= 4;
>  }
>
> -llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
> +Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
>                                              QualType Ty,
>                                              CodeGenFunction &CGF) const {
>    ABIArgInfo AI = classifyArgumentType(Ty);
> @@ -4266,24 +4377,32 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>    llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
>    llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
>    llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
> -  auto &Ctx = CGF.getContext();
>
> -  llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
> +  auto TyInfo = getContext().getTypeInfoInChars(Ty);
> +  CharUnits TyAlign = TyInfo.second;
> +
> +  Address reg_offs_p = Address::invalid();
> +  llvm::Value *reg_offs = nullptr;
>    int reg_top_index;
> -  int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
> +  CharUnits reg_top_offset;
> +  int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity();
>    if (!IsFPR) {
>      // 3 is the field number of __gr_offs
>      reg_offs_p =
> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3, "gr_offs_p");
> +        CGF.Builder.CreateStructGEP(VAListAddr, 3,
> CharUnits::fromQuantity(24),
> +                                    "gr_offs_p");
>      reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
>      reg_top_index = 1; // field number for __gr_top
> +    reg_top_offset = CharUnits::fromQuantity(8);
>      RegSize = llvm::RoundUpToAlignment(RegSize, 8);
>    } else {
>      // 4 is the field number of __vr_offs.
>      reg_offs_p =
> -        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 4, "vr_offs_p");
> +        CGF.Builder.CreateStructGEP(VAListAddr, 4,
> CharUnits::fromQuantity(28),
> +                                    "vr_offs_p");
>      reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
>      reg_top_index = 2; // field number for __vr_top
> +    reg_top_offset = CharUnits::fromQuantity(16);
>      RegSize = 16 * NumRegs;
>    }
>
> @@ -4308,8 +4427,8 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>    // Integer arguments may need to correct register alignment (for
> example a
>    // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this
> case we
>    // align __gr_offs to calculate the potential address.
> -  if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
> -    int Align = Ctx.getTypeAlign(Ty) / 8;
> +  if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
> +    int Align = TyAlign.getQuantity();
>
>      reg_offs = CGF.Builder.CreateAdd(
>          reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
> @@ -4320,6 +4439,9 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>    }
>
>    // Update the gr_offs/vr_offs pointer for next call to va_arg on this
> va_list.
> +  // The fact that this is done unconditionally reflects the fact that
> +  // allocating an argument to the stack also uses up all the remaining
> +  // registers of the appropriate kind.
>    llvm::Value *NewOffset = nullptr;
>    NewOffset = CGF.Builder.CreateAdd(
>        reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize),
> "new_reg_offs");
> @@ -4341,13 +4463,14 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>    // registers. First start the appropriate block:
>    CGF.EmitBlock(InRegBlock);
>
> -  llvm::Value *reg_top_p = nullptr, *reg_top = nullptr;
> -  reg_top_p = CGF.Builder.CreateStructGEP(nullptr, VAListAddr,
> reg_top_index,
> -                                          "reg_top_p");
> +  llvm::Value *reg_top = nullptr;
> +  Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr,
> reg_top_index,
> +                                                  reg_top_offset,
> "reg_top_p");
>    reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
> -  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
> -  llvm::Value *RegAddr = nullptr;
> -  llvm::Type *MemTy =
> llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
> +  Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs),
> +                   CharUnits::fromQuantity(IsFPR ? 16 : 8));
> +  Address RegAddr = Address::invalid();
> +  llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty);
>
>    if (IsIndirect) {
>      // If it's been passed indirectly (actually a struct), whatever we
> find from
> @@ -4364,43 +4487,45 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
>      // qN+1, ...). We reload and store into a temporary local variable
>      // contiguously.
>      assert(!IsIndirect &&
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150910/ca5a6216/attachment-0001.html>


More information about the cfe-commits mailing list