[llvm] 7fee4fe - Add support for Linux/Musl ABI

Sid Manning via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 20 08:00:14 PST 2020


Author: Sid Manning
Date: 2020-01-20T09:59:56-06:00
New Revision: 7fee4fed4c75c13d0cec7ff3a043e0313a3abc55

URL: https://github.com/llvm/llvm-project/commit/7fee4fed4c75c13d0cec7ff3a043e0313a3abc55
DIFF: https://github.com/llvm/llvm-project/commit/7fee4fed4c75c13d0cec7ff3a043e0313a3abc55.diff

LOG: Add support for Linux/Musl ABI

Differential revision: https://reviews.llvm.org/D72701

The patch adds a new option ABI for Hexagon. It primary deals with
the way variable arguments are passed and is use in the Hexagon Linux Musl
environment.

If a callee function has a variable argument list, it must perform the
following operations to set up its function prologue:

  1. Determine the number of registers which could have been used for passing
     unnamed arguments. This can be calculated by counting the number of
     registers used for passing named arguments. For example, if the callee
     function is as follows:

         int foo(int a, ...){ ... }

     ... then register R0 is used to access the argument ' a '. The registers
     available for passing unnamed arguments are R1, R2, R3, R4, and R5.

  2. Determine the number and size of the named arguments on the stack.

  3. If the callee has named arguments on the stack, it should copy all of these
     arguments to a location below the current position on the stack, and the
     difference should be the size of the register-saved area plus padding
     (if any is necessary).

     The register-saved area constitutes all the registers that could have
     been used to pass unnamed arguments. If the number of registers forming
     the register-saved area is odd, it requires 4 bytes of padding; if the
     number is even, no padding is required. This is done to ensure an 8-byte
     alignment on the stack.  For example, if the callee is as follows:

       int foo(int a, ...){ ... }

     ... then the named arguments should be copied to the following location:

       current_position - 5 (for R1-R5) * 4 (bytes) - 4 (bytes of padding)

     If the callee is as follows:

        int foo(int a, int b, ...){ ... }

     ... then the named arguments should be copied to the following location:

        current_position - 4 (for R2-R5) * 4 (bytes) - 0 (bytes of padding)

  4. After any named arguments have been copied, copy all the registers that
     could have been used to pass unnamed arguments on the stack. If the number
     of registers is odd, leave 4 bytes of padding and then start copying them
     on the stack; if the number is even, no padding is required. This
     constitutes the register-saved area. If padding is required, ensure
     that the start location of padding is 8-byte aligned.  If no padding is
     required, ensure that the start location of the on-stack copy of the
     first register which might have a variable argument is 8-byte aligned.

  5. Decrement the stack pointer by the size of register saved area plus the
     padding.  For example, if the callee is as follows:

        int foo(int a, ...){ ... } ;

     ... then the decrement value should be the following:

        5 (for R1-R5) * 4 (bytes) + 4 (bytes of padding) = 24 bytes

     The decrement should be performed before the allocframe instruction.
     Increment the stack-pointer back by the same amount before returning
     from the function.

Added: 
    llvm/test/CodeGen/Hexagon/vacopy.ll
    llvm/test/CodeGen/Hexagon/vararg-deallocate-sp.ll
    llvm/test/CodeGen/Hexagon/vararg-linux-abi.ll
    llvm/test/CodeGen/Hexagon/vararg.ll
    llvm/test/CodeGen/Hexagon/vararg_align_check.ll
    llvm/test/CodeGen/Hexagon/vararg_double_onstack.ll
    llvm/test/CodeGen/Hexagon/vararg_named.ll

Modified: 
    llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
    llvm/lib/Target/Hexagon/HexagonFrameLowering.h
    llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
    llvm/lib/Target/Hexagon/HexagonISelLowering.h
    llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
    llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
    llvm/lib/Target/Hexagon/HexagonSubtarget.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index aff8e57b0a94..c8948ecc9052 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -395,6 +395,9 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
       MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
   static unsigned ShrinkCounter = 0;
 
+  if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() &&
+      MF.getFunction().isVarArg())
+    return;
   if (ShrinkLimit.getPosition()) {
     if (ShrinkCounter >= ShrinkLimit)
       return;
@@ -622,6 +625,118 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
 
   DebugLoc dl = MBB.findDebugLoc(InsertPt);
 
+  if (MF.getFunction().isVarArg() &&
+      MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
+    // Calculate the size of register saved area.
+    int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+    int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0)
+                                              ? NumVarArgRegs * 4
+                                              : NumVarArgRegs * 4 + 4;
+    if (RegisterSavedAreaSizePlusPadding > 0) {
+      // Decrement the stack pointer by size of register saved area plus
+      // padding if any.
+      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+        .addReg(SP)
+        .addImm(-RegisterSavedAreaSizePlusPadding)
+        .setMIFlag(MachineInstr::FrameSetup);
+
+      int NumBytes = 0;
+      // Copy all the named arguments below register saved area.
+      auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+      for (int i = HMFI.getFirstNamedArgFrameIndex(),
+               e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) {
+        int ObjSize = MFI.getObjectSize(i);
+        int ObjAlign = MFI.getObjectAlignment(i);
+
+        // Determine the kind of load/store that should be used.
+        unsigned LDOpc, STOpc;
+        int OpcodeChecker = ObjAlign;
+
+        // Handle cases where alignment of an object is > its size.
+        if (ObjSize < ObjAlign) {
+          if (ObjSize <= 1)
+            OpcodeChecker = 1;
+          else if (ObjSize <= 2)
+            OpcodeChecker = 2;
+          else if (ObjSize <= 4)
+            OpcodeChecker = 4;
+          else if (ObjSize > 4)
+            OpcodeChecker = 8;
+        }
+
+        switch (OpcodeChecker) {
+          case 1:
+            LDOpc = Hexagon::L2_loadrb_io;
+            STOpc = Hexagon::S2_storerb_io;
+            break;
+          case 2:
+            LDOpc = Hexagon::L2_loadrh_io;
+            STOpc = Hexagon::S2_storerh_io;
+            break;
+          case 4:
+            LDOpc = Hexagon::L2_loadri_io;
+            STOpc = Hexagon::S2_storeri_io;
+            break;
+          case 8:
+          default:
+            LDOpc = Hexagon::L2_loadrd_io;
+            STOpc = Hexagon::S2_storerd_io;
+            break;
+        }
+
+        unsigned RegUsed = LDOpc == Hexagon::L2_loadrd_io ? Hexagon::D3
+                                                          : Hexagon::R6;
+        int LoadStoreCount = ObjSize / OpcodeChecker;
+
+        if (ObjSize % OpcodeChecker)
+          ++LoadStoreCount;
+
+        // Get the start location of the load. NumBytes is basically the
+        // offset from the stack pointer of previous function, which would be
+        // the caller in this case, as this function has variable argument
+        // list.
+        if (NumBytes != 0)
+          NumBytes = alignTo(NumBytes, ObjAlign);
+
+        int Count = 0;
+        while (Count < LoadStoreCount) {
+          // Load the value of the named argument on stack.
+          BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed)
+            .addReg(SP)
+            .addImm(RegisterSavedAreaSizePlusPadding +
+                    ObjAlign * Count + NumBytes)
+            .setMIFlag(MachineInstr::FrameSetup);
+
+          // Store it below the register saved area plus padding.
+          BuildMI(MBB, InsertPt, dl, HII.get(STOpc))
+            .addReg(SP)
+            .addImm(ObjAlign * Count + NumBytes)
+            .addReg(RegUsed)
+            .setMIFlag(MachineInstr::FrameSetup);
+
+          Count++;
+        }
+        NumBytes += MFI.getObjectSize(i);
+      }
+
+      // Make NumBytes 8 byte aligned
+      NumBytes = alignTo(NumBytes, 8);
+
+      // If the number of registers having variable arguments is odd,
+      // leave 4 bytes of padding to get to the location where first
+      // variable argument which was passed through register was copied.
+      NumBytes = (NumVarArgRegs % 2 == 0) ? NumBytes : NumBytes + 4;
+
+      for (int j = FirstVarArgSavedReg, i = 0; j < 6; ++j, ++i) {
+        BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_storeri_io))
+          .addReg(SP)
+          .addImm(NumBytes + 4 * i)
+          .addReg(Hexagon::R0 + j)
+          .setMIFlag(MachineInstr::FrameSetup);
+      }
+    }
+  }
+
   if (hasFP(MF)) {
     insertAllocframe(MBB, InsertPt, NumBytes);
     if (AlignStack) {
@@ -655,7 +770,16 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
 
   if (!hasFP(MF)) {
     MachineFrameInfo &MFI = MF.getFrameInfo();
-    if (unsigned NumBytes = MFI.getStackSize()) {
+    unsigned NumBytes = MFI.getStackSize();
+    if (MF.getFunction().isVarArg() &&
+        MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
+      // On Hexagon Linux, deallocate the stack for the register saved area.
+      int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+      int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
+        (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
+      NumBytes += RegisterSavedAreaSizePlusPadding;
+    }
+    if (NumBytes) {
       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
         .addReg(SP)
         .addImm(NumBytes);
@@ -710,24 +834,49 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
       NeedsDeallocframe = false;
   }
 
-  if (!NeedsDeallocframe)
-    return;
-  // If the returning instruction is PS_jmpret, replace it with dealloc_return,
-  // otherwise just add deallocframe. The function could be returning via a
-  // tail call.
-  if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
-    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
+  if (!MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() ||
+      !MF.getFunction().isVarArg()) {
+    if (!NeedsDeallocframe)
+      return;
+    // If the returning instruction is PS_jmpret, replace it with
+    // dealloc_return, otherwise just add deallocframe. The function
+    // could be returning via a tail call.
+    if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
+      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
       .addDef(Hexagon::D15)
       .addReg(Hexagon::R30);
-    return;
-  }
-  unsigned NewOpc = Hexagon::L4_return;
-  MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
+      return;
+    }
+    unsigned NewOpc = Hexagon::L4_return;
+    MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
       .addDef(Hexagon::D15)
       .addReg(Hexagon::R30);
-  // Transfer the function live-out registers.
-  NewI->copyImplicitOps(MF, *RetI);
-  MBB.erase(RetI);
+    // Transfer the function live-out registers.
+    NewI->copyImplicitOps(MF, *RetI);
+    MBB.erase(RetI);
+  } else {
+    // L2_deallocframe instruction after it.
+    // Calculate the size of register saved area.
+    int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+    int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
+      (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
+
+    MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
+    MachineBasicBlock::iterator I = (Term == MBB.begin()) ? MBB.end()
+                                                          : std::prev(Term);
+    if (I == MBB.end() ||
+       (I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT &&
+        I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC &&
+        I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 &&
+        I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC))
+      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
+        .addDef(Hexagon::D15)
+        .addReg(Hexagon::R30);
+    if (RegisterSavedAreaSizePlusPadding != 0)
+      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+        .addReg(SP)
+        .addImm(RegisterSavedAreaSizePlusPadding);
+  }
 }
 
 void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
@@ -2473,6 +2622,8 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
 /// checks are performed, which may still lead to the inline code.
 bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
       const CSIVect &CSI) const {
+  if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl())
+    return true;
   if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
     return true;
   if (!hasFP(MF))

diff  --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 27265dd53794..65e6185ac559 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -29,6 +29,8 @@ class TargetRegisterClass;
 
 class HexagonFrameLowering : public TargetFrameLowering {
 public:
+  // First register which could possibly hold a variable argument.
+  int FirstVarArgSavedReg;
   explicit HexagonFrameLowering()
       : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {}
 

diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index e11ecdc7d035..f356537abc32 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -393,9 +393,12 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
 
+  // Linux ABI treats var-arg calls the same way as regular ones.
+  bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
+
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+  HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
                         NumParams);
 
   if (Subtarget.useHVXOps())
@@ -750,9 +753,13 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
   MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
+  // Linux ABI treats var-arg calls the same way as regular ones.
+  bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
+
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+  HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
+                        *DAG.getContext(),
                         MF.getFunction().getFunctionType()->getNumParams());
 
   if (Subtarget.useHVXOps())
@@ -766,8 +773,24 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
   // caller's stack is passed only when the struct size is smaller than (and
   // equal to) 8 bytes. If not, no address will be passed into callee and
   // callee return the result direclty through R0/R1.
+  auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
+    switch (RC.getID()) {
+    case Hexagon::IntRegsRegClassID:
+      return Reg - Hexagon::R0 + 1;
+    case Hexagon::DoubleRegsRegClassID:
+      return (Reg - Hexagon::D0 + 1) * 2;
+    case Hexagon::HvxVRRegClassID:
+      return Reg - Hexagon::V0 + 1;
+    case Hexagon::HvxWRRegClassID:
+      return (Reg - Hexagon::W0 + 1) * 2;
+    }
+    llvm_unreachable("Unexpected register class");
+  };
 
+  auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+  HFL.FirstVarArgSavedReg = 0;
+  HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -811,6 +834,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
       }
       InVals.push_back(Copy);
       MRI.addLiveIn(VA.getLocReg(), VReg);
+      HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
     } else {
       assert(VA.isMemLoc() && "Argument should be passed in memory");
 
@@ -838,8 +862,48 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
     }
   }
 
+  if (IsVarArg && Subtarget.isEnvironmentMusl()) {
+    for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
+      MRI.addLiveIn(Hexagon::R0+i);
+  }
+
+  if (IsVarArg && Subtarget.isEnvironmentMusl()) {
+    HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
+    HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
+
+    // Create Frame index for the start of register saved area.
+    int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
+    bool RequiresPadding = (NumVarArgRegs & 1);
+    int RegSaveAreaSizePlusPadding = RequiresPadding
+                                        ? (NumVarArgRegs + 1) * 4
+                                        : NumVarArgRegs * 4;
+
+    if (RegSaveAreaSizePlusPadding > 0) {
+      // The offset to saved register area should be 8 byte aligned.
+      int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+      if (!(RegAreaStart % 8))
+        RegAreaStart = (RegAreaStart + 7) & -8;
 
-  if (IsVarArg) {
+      int RegSaveAreaFrameIndex =
+        MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
+      HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
+
+      // This will point to the next argument passed via stack.
+      int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
+      int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+      HMFI.setVarArgsFrameIndex(FI);
+    } else {
+      // This will point to the next argument passed via stack, when
+      // there is no saved register area.
+      int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+      int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+      HMFI.setRegSavedAreaStartFrameIndex(FI);
+      HMFI.setVarArgsFrameIndex(FI);
+    }
+  }
+
+
+  if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
     // This will point to the next argument passed via stack.
     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
@@ -857,8 +921,82 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
-                      MachinePointerInfo(SV));
+
+  if (!Subtarget.isEnvironmentMusl()) {
+    return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
+                        MachinePointerInfo(SV));
+  }
+  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+  auto &HFL = *Subtarget.getFrameLowering();
+  SDLoc DL(Op);
+  SmallVector<SDValue, 8> MemOps;
+
+  // Get frame index of va_list.
+  SDValue FIN = Op.getOperand(1);
+
+  // If first Vararg register is odd, add 4 bytes to start of
+  // saved register area to point to the first register location.
+  // This is because the saved register area has to be 8 byte aligned.
+  // Incase of an odd start register, there will be 4 bytes of padding in
+  // the beginning of saved register area. If all registers area used up,
+  // the following condition will handle it correctly.
+  SDValue SavedRegAreaStartFrameIndex =
+    DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
+
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+  if (HFL.FirstVarArgSavedReg & 1)
+    SavedRegAreaStartFrameIndex =
+      DAG.getNode(ISD::ADD, DL, PtrVT,
+                  DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
+                                    MVT::i32),
+                  DAG.getIntPtrConstant(4, DL));
+
+  // Store the saved register area start pointer.
+  SDValue Store =
+    DAG.getStore(Op.getOperand(0), DL,
+                 SavedRegAreaStartFrameIndex,
+                 FIN, MachinePointerInfo(SV));
+  MemOps.push_back(Store);
+
+  // Store saved register area end pointer.
+  FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
+                    FIN, DAG.getIntPtrConstant(4, DL));
+  Store = DAG.getStore(Op.getOperand(0), DL,
+                       DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
+                                         PtrVT),
+                       FIN, MachinePointerInfo(SV, 4));
+  MemOps.push_back(Store);
+
+  // Store overflow area pointer.
+  FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
+                    FIN, DAG.getIntPtrConstant(4, DL));
+  Store = DAG.getStore(Op.getOperand(0), DL,
+                       DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
+                                         PtrVT),
+                       FIN, MachinePointerInfo(SV, 8));
+  MemOps.push_back(Store);
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
+}
+
+SDValue
+HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  // Assert that the linux ABI is enabled for the current compilation.
+  assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
+  SDValue Chain = Op.getOperand(0);
+  SDValue DestPtr = Op.getOperand(1);
+  SDValue SrcPtr = Op.getOperand(2);
+  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  SDLoc DL(Op);
+  // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
+  // we need to memcopy 12 bytes from va_list to another similar list.
+  return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr,
+                       DAG.getIntPtrConstant(12, DL), 4, /*isVolatile*/false,
+                       false, false,
+                       MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+
 }
 
 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -1375,7 +1513,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
-  setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
+  if (Subtarget.isEnvironmentMusl())
+    setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+  else
+    setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
 
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -2928,6 +3069,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+    case ISD::VACOPY:              return LowerVACOPY(Op, DAG);
     case ISD::VASTART:              return LowerVASTART(Op, DAG);
     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
     case ISD::SETCC:                return LowerSETCC(Op, DAG);

diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index e79646de6287..d40b889b3aff 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -248,6 +248,7 @@ namespace HexagonISD {
     }
 
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
 

diff  --git a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 2961e16cc9dc..89ef5c2a891d 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -30,6 +30,9 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   unsigned StackAlignBaseVReg = 0;    // Aligned-stack base register (virtual)
   unsigned StackAlignBasePhysReg = 0; //                             (physical)
   int VarArgsFrameIndex;
+  int RegSavedAreaStartFrameIndex;
+  int FirstNamedArgFrameIndex;
+  int LastNamedArgFrameIndex;
   bool HasClobberLR = false;
   bool HasEHReturn = false;
   std::map<const MachineInstr*, unsigned> PacketInfo;
@@ -46,6 +49,15 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
   int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
 
+  void setRegSavedAreaStartFrameIndex(int v) { RegSavedAreaStartFrameIndex = v;}
+  int getRegSavedAreaStartFrameIndex() { return RegSavedAreaStartFrameIndex; }
+
+  void setFirstNamedArgFrameIndex(int v) { FirstNamedArgFrameIndex = v; }
+  int getFirstNamedArgFrameIndex() { return FirstNamedArgFrameIndex; }
+
+  void setLastNamedArgFrameIndex(int v) { LastNamedArgFrameIndex = v; }
+  int getLastNamedArgFrameIndex() { return LastNamedArgFrameIndex; }
+
   void setStartPacket(MachineInstr* MI) {
     PacketInfo[MI] |= Hexagon::StartPacket;
   }

diff  --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 6c706fea096b..097a0bea9baa 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -79,7 +79,7 @@ static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict",
 HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
                                    StringRef FS, const TargetMachine &TM)
     : HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()),
-      CPUString(Hexagon_MC::selectHexagonCPU(CPU)),
+      CPUString(Hexagon_MC::selectHexagonCPU(CPU)), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       RegInfo(getHwMode()), TLInfo(TM, *this),
       InstrItins(getInstrItineraryForCPU(CPUString)) {

diff  --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index f81d668b3a1c..d19709fa5e34 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -86,6 +86,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
 
 private:
   std::string CPUString;
+  Triple TargetTriple;
   HexagonInstrInfo InstrInfo;
   HexagonRegisterInfo RegInfo;
   HexagonTargetLowering TLInfo;
@@ -97,6 +98,11 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
   HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
                    const TargetMachine &TM);
 
+  const Triple &getTargetTriple() const { return TargetTriple; }
+  bool isEnvironmentMusl() const {
+    return TargetTriple.getEnvironment() == Triple::Musl;
+  }
+
   /// getInstrItins - Return the instruction itineraries based on subtarget
   /// selection.
   const InstrItineraryData *getInstrItineraryData() const override {

diff  --git a/llvm/test/CodeGen/Hexagon/vacopy.ll b/llvm/test/CodeGen/Hexagon/vacopy.ll
new file mode 100644
index 000000000000..7b9dcd3ab45a
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vacopy.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
+; CHECK-LABEL: PrintInts:
+; CHECK-DAG: memw{{.*}} = r{{[0-9]+}}
+; CHECK-DAG: memw{{.*}} = r{{[0-9]+}}
+; CHECK-DAG: r{{[0-9]+}}:{{[0-9]+}} = memd{{.*}}
+; CHECK-DAG: memd{{.*}} = r{{[0-9]+}}:{{[0-9]+}}
+
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+; Function Attrs: nounwind
+define void @PrintInts(i32 %first, ...) #0 {
+entry:
+  %vl = alloca [1 x %struct.__va_list_tag], align 8
+  %vl_count = alloca [1 x %struct.__va_list_tag], align 8
+  %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %vl to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  %0 = bitcast [1 x %struct.__va_list_tag]* %vl_count to i8*
+  call void @llvm.va_copy(i8* %0, i8* %arraydecay1)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_copy(i8*, i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  tail call void (i32, ...) @PrintInts(i32 undef, i32 20, i32 30, i32 40, i32 50, i32 0)
+  ret i32 0
+}
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}

diff  --git a/llvm/test/CodeGen/Hexagon/vararg-deallocate-sp.ll b/llvm/test/CodeGen/Hexagon/vararg-deallocate-sp.ll
new file mode 100644
index 000000000000..fbdfb65c57d7
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vararg-deallocate-sp.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
+
+; Test that the compiler deallocates the register saved area on Linux
+; for functions that do not need a frame pointer.
+
+; CHECK: r29 = add(r29,#-[[SIZE:[0-9]+]]
+; CHECK: r29 = add(r29,#[[SIZE]])
+
+define void @test(...) {
+entry:
+  ret void
+}
+

diff  --git a/llvm/test/CodeGen/Hexagon/vararg-linux-abi.ll b/llvm/test/CodeGen/Hexagon/vararg-linux-abi.ll
new file mode 100644
index 000000000000..a4523313893f
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vararg-linux-abi.ll
@@ -0,0 +1,93 @@
+; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
+
+; Check that we update the stack pointer before we do allocframe, so that
+; the LR/FP are stored in the location required by the Linux ABI.
+; CHECK: r29 = add(r29,#-24)
+; CHECK: allocframe
+
+target triple = "hexagon-unknown-linux"
+
+%s.0 = type { i8*, i8*, i8* }
+
+define dso_local i32 @f0(i32 %a0, ...) local_unnamed_addr #0 {
+b0:
+  %v0 = alloca [1 x %s.0], align 8
+  %v1 = bitcast [1 x %s.0]* %v0 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %v1) #2
+  call void @llvm.va_start(i8* nonnull %v1)
+  %v2 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 0
+  %v3 = load i8*, i8** %v2, align 8
+  %v4 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 1
+  %v5 = load i8*, i8** %v4, align 4
+  %v6 = getelementptr i8, i8* %v3, i32 4
+  %v7 = icmp sgt i8* %v6, %v5
+  br i1 %v7, label %b1, label %b2
+
+b1:                                               ; preds = %b0
+  %v8 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 2
+  %v9 = load i8*, i8** %v8, align 8
+  %v10 = getelementptr i8, i8* %v9, i32 4
+  store i8* %v10, i8** %v8, align 8
+  br label %b2
+
+b2:                                               ; preds = %b1, %b0
+  %v11 = phi i8* [ %v10, %b1 ], [ %v6, %b0 ]
+  %v12 = phi i8* [ %v9, %b1 ], [ %v3, %b0 ]
+  %v13 = bitcast i8* %v12 to i32*
+  store i8* %v11, i8** %v2, align 8
+  %v14 = load i32, i32* %v13, align 4
+  %v15 = icmp eq i32 %v14, 0
+  br i1 %v15, label %b7, label %b3
+
+b3:                                               ; preds = %b2
+  %v16 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 2
+  br label %b4
+
+b4:                                               ; preds = %b6, %b3
+  %v17 = phi i32 [ %v14, %b3 ], [ %v28, %b6 ]
+  %v18 = phi i32 [ %a0, %b3 ], [ %v20, %b6 ]
+  %v19 = phi i8* [ %v11, %b3 ], [ %v25, %b6 ]
+  %v20 = add nsw i32 %v17, %v18
+  %v21 = getelementptr i8, i8* %v19, i32 4
+  %v22 = icmp sgt i8* %v21, %v5
+  br i1 %v22, label %b5, label %b6
+
+b5:                                               ; preds = %b4
+  %v23 = load i8*, i8** %v16, align 8
+  %v24 = getelementptr i8, i8* %v23, i32 4
+  store i8* %v24, i8** %v16, align 8
+  br label %b6
+
+b6:                                               ; preds = %b5, %b4
+  %v25 = phi i8* [ %v24, %b5 ], [ %v21, %b4 ]
+  %v26 = phi i8* [ %v23, %b5 ], [ %v19, %b4 ]
+  %v27 = bitcast i8* %v26 to i32*
+  store i8* %v25, i8** %v2, align 8
+  %v28 = load i32, i32* %v27, align 4
+  %v29 = icmp eq i32 %v28, 0
+  br i1 %v29, label %b7, label %b4
+
+b7:                                               ; preds = %b6, %b2
+  %v30 = phi i32 [ %a0, %b2 ], [ %v20, %b6 ]
+  call void @llvm.va_end(i8* nonnull %v1)
+  call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %v1) #2
+  ret i32 %v30
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+attributes #0 = { argmemonly nounwind "frame-pointer"="all" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"wchar_size", i32 4}

diff  --git a/llvm/test/CodeGen/Hexagon/vararg.ll b/llvm/test/CodeGen/Hexagon/vararg.ll
new file mode 100644
index 000000000000..ed9029016af5
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vararg.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl  -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check function prologue generation
+; CHECK: r29 = add(r29,#-24)
+; CHECK: memw(r29+#4) = r1
+; CHECK: memw(r29+#8) = r2
+; CHECK: memw(r29+#12) = r3
+; CHECK: memw(r29+#16) = r4
+; CHECK: memw(r29+#20) = r5
+; CHECK: r29 = add(r29,#24)
+
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+ at aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+ at .str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, ...) #0 {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 8
+  %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  %__current_saved_reg_area_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 0
+  %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p, align 8
+  %__saved_reg_area_end_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 1
+  %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p, align 4
+  %__new_saved_reg_area_pointer = getelementptr i8, i8* %__current_saved_reg_area_pointer, i32 4
+  %0 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+  %__overflow_area_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2
+  %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p, align 8
+  br i1 %0, label %vaarg.on_stack, label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %entry
+  %__overflow_area_pointer.next = getelementptr i8, i8* %__overflow_area_pointer, i32 4
+  store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p, align 8
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %entry, %vaarg.on_stack
+  %__overflow_area_pointer5 = phi i8* [ %__overflow_area_pointer.next, %vaarg.on_stack ], [ %__overflow_area_pointer, %entry ]
+  %storemerge32 = phi i8* [ %__overflow_area_pointer.next, %vaarg.on_stack ], [ %__new_saved_reg_area_pointer, %entry ]
+  %vaarg.addr.in = phi i8* [ %__overflow_area_pointer, %vaarg.on_stack ], [ %__current_saved_reg_area_pointer, %entry ]
+  store i8* %storemerge32, i8** %__current_saved_reg_area_pointer_p, align 8
+  %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
+  %1 = load i32, i32* %vaarg.addr, align 4
+  %__overflow_area_pointer_p4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2
+  %__overflow_area_pointer.next6 = getelementptr i8, i8* %__overflow_area_pointer5, i32 16
+  store i8* %__overflow_area_pointer.next6, i8** %__overflow_area_pointer_p4, align 8
+  %bbb.sroa.1.0.idx27 = getelementptr inbounds i8, i8* %__overflow_area_pointer5, i32 12
+  %2 = bitcast i8* %bbb.sroa.1.0.idx27 to i32*
+  %bbb.sroa.1.0.copyload = load i32, i32* %2, align 4
+  %add8 = add nsw i32 %bbb.sroa.1.0.copyload, %1
+  %__new_saved_reg_area_pointer15 = getelementptr i8, i8* %storemerge32, i32 4
+  %3 = icmp sgt i8* %__new_saved_reg_area_pointer15, %__saved_reg_area_end_pointer
+  br i1 %3, label %vaarg.on_stack17, label %vaarg.end21
+
+vaarg.on_stack17:                                 ; preds = %vaarg.end
+  %__overflow_area_pointer.next20 = getelementptr i8, i8* %__overflow_area_pointer5, i32 20
+  store i8* %__overflow_area_pointer.next20, i8** %__overflow_area_pointer_p4, align 8
+  br label %vaarg.end21
+
+vaarg.end21:                                      ; preds = %vaarg.end, %vaarg.on_stack17
+  %storemerge = phi i8* [ %__overflow_area_pointer.next20, %vaarg.on_stack17 ], [ %__new_saved_reg_area_pointer15, %vaarg.end ]
+  %vaarg.addr22.in = phi i8* [ %__overflow_area_pointer.next6, %vaarg.on_stack17 ], [ %storemerge32, %vaarg.end ]
+  store i8* %storemerge, i8** %__current_saved_reg_area_pointer_p, align 8
+  %vaarg.addr22 = bitcast i8* %vaarg.addr22.in to i32*
+  %4 = load i32, i32* %vaarg.addr22, align 4
+  %add23 = add nsw i32 %add8, %4
+  call void @llvm.va_end(i8* %arraydecay1)
+  ret i32 %add23
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %call = tail call i32 (i32, ...) @foo(i32 undef, i32 2, %struct.AAA* byval align 4 @aaa, i32 4)
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %call) #1
+  ret i32 %call
+}
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #0
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}

diff  --git a/llvm/test/CodeGen/Hexagon/vararg_align_check.ll b/llvm/test/CodeGen/Hexagon/vararg_align_check.ll
new file mode 100644
index 000000000000..0152eec7020d
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vararg_align_check.ll
@@ -0,0 +1,186 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv62  -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check Function prologue.
+; Note. All register numbers and offset are fixed.
+; Hence, no need of regular expression.
+
+; CHECK: r29 = add(r29,#-24)
+; CHECK: r7:6 = memd(r29+#24)
+; CHECK: memd(r29+#0) = r7:6
+; CHECK: r7:6 = memd(r29+#32)
+; CHECK: memd(r29+#8) = r7:6
+; CHECK: r7:6 = memd(r29+#40)
+; CHECK: memd(r29+#16) = r7:6
+; CHECK: memw(r29+#28) = r1
+; CHECK: memw(r29+#32) = r2
+; CHECK: memw(r29+#36) = r3
+; CHECK: memw(r29+#40) = r4
+; CHECK: memw(r29+#44) = r5
+; CHECK: r29 = add(r29,#24)
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.BBB = type { i8, i64, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+ at aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+ at ddd = global { i8, i64, i32, [4 x i8] } { i8 1, i64 1000000, i32 5, [4 x i8] undef }, align 8
+ at .str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, %struct.BBB* byval align 8 %eee, ...) #0 {
+entry:
+  %xx.addr = alloca i32, align 4
+  %ap = alloca [1 x %struct.__va_list_tag], align 8
+  %d = alloca i32, align 4
+  %k = alloca i64, align 8
+  %ret = alloca i32, align 4
+  %bbb = alloca %struct.AAA, align 4
+  store i32 %xx, i32* %xx.addr, align 4
+  store i32 0, i32* %ret, align 4
+  %x = getelementptr inbounds %struct.BBB, %struct.BBB* %eee, i32 0, i32 0
+  %0 = load i8, i8* %x, align 1
+  %tobool = trunc i8 %0 to i1
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %ret, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  br label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %if.end
+  %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 0
+  %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p
+  %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 1
+  %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p
+  %1 = ptrtoint i8* %__current_saved_reg_area_pointer to i32
+  %align_current_saved_reg_area_pointer = add i32 %1, 7
+  %align_current_saved_reg_area_pointer3 = and i32 %align_current_saved_reg_area_pointer, -8
+  %align_current_saved_reg_area_pointer4 = inttoptr i32 %align_current_saved_reg_area_pointer3 to i8*
+  %__new_saved_reg_area_pointer = getelementptr i8, i8* %align_current_saved_reg_area_pointer4, i32 8
+  %2 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+  br i1 %2, label %vaarg.on_stack, label %vaarg.in_reg
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %3 = bitcast i8* %align_current_saved_reg_area_pointer4 to i64*
+  store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg
+  %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 2
+  %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p
+  %4 = ptrtoint i8* %__overflow_area_pointer to i32
+  %align_overflow_area_pointer = add i32 %4, 7
+  %align_overflow_area_pointer5 = and i32 %align_overflow_area_pointer, -8
+  %align_overflow_area_pointer6 = inttoptr i32 %align_overflow_area_pointer5 to i8*
+  %__overflow_area_pointer.next = getelementptr i8, i8* %align_overflow_area_pointer6, i32 8
+  store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p
+  store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p
+  %5 = bitcast i8* %align_overflow_area_pointer6 to i64*
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %vaarg.addr = phi i64* [ %3, %vaarg.in_reg ], [ %5, %vaarg.on_stack ]
+  %6 = load i64, i64* %vaarg.addr
+  store i64 %6, i64* %k, align 8
+  %7 = load i64, i64* %k, align 8
+  %conv = trunc i64 %7 to i32
+  %div = sdiv i32 %conv, 1000
+  %8 = load i32, i32* %ret, align 4
+  %add = add nsw i32 %8, %div
+  store i32 %add, i32* %ret, align 4
+  %arraydecay7 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %__overflow_area_pointer_p8 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay7, i32 0, i32 2
+  %__overflow_area_pointer9 = load i8*, i8** %__overflow_area_pointer_p8
+  %9 = bitcast i8* %__overflow_area_pointer9 to %struct.AAA*
+  %__overflow_area_pointer.next10 = getelementptr i8, i8* %__overflow_area_pointer9, i32 16
+  store i8* %__overflow_area_pointer.next10, i8** %__overflow_area_pointer_p8
+  %10 = bitcast %struct.AAA* %bbb to i8*
+  %11 = bitcast %struct.AAA* %9 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %10, i8* %11, i32 16, i32 4, i1 false)
+  %d11 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3
+  %12 = load i32, i32* %d11, align 4
+  %13 = load i32, i32* %ret, align 4
+  %add12 = add nsw i32 %13, %12
+  store i32 %add12, i32* %ret, align 4
+  %arraydecay13 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  br label %vaarg.maybe_reg14
+
+vaarg.maybe_reg14:                                ; preds = %vaarg.end
+  %__current_saved_reg_area_pointer_p15 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 0
+  %__current_saved_reg_area_pointer16 = load i8*, i8** %__current_saved_reg_area_pointer_p15
+  %__saved_reg_area_end_pointer_p17 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 1
+  %__saved_reg_area_end_pointer18 = load i8*, i8** %__saved_reg_area_end_pointer_p17
+  %__new_saved_reg_area_pointer19 = getelementptr i8, i8* %__current_saved_reg_area_pointer16, i32 4
+  %14 = icmp sgt i8* %__new_saved_reg_area_pointer19, %__saved_reg_area_end_pointer18
+  br i1 %14, label %vaarg.on_stack21, label %vaarg.in_reg20
+
+vaarg.in_reg20:                                   ; preds = %vaarg.maybe_reg14
+  %15 = bitcast i8* %__current_saved_reg_area_pointer16 to i32*
+  store i8* %__new_saved_reg_area_pointer19, i8** %__current_saved_reg_area_pointer_p15
+  br label %vaarg.end25
+
+vaarg.on_stack21:                                 ; preds = %vaarg.maybe_reg14
+  %__overflow_area_pointer_p22 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 2
+  %__overflow_area_pointer23 = load i8*, i8** %__overflow_area_pointer_p22
+  %__overflow_area_pointer.next24 = getelementptr i8, i8* %__overflow_area_pointer23, i32 4
+  store i8* %__overflow_area_pointer.next24, i8** %__overflow_area_pointer_p22
+  store i8* %__overflow_area_pointer.next24, i8** %__current_saved_reg_area_pointer_p15
+  %16 = bitcast i8* %__overflow_area_pointer23 to i32*
+  br label %vaarg.end25
+
+vaarg.end25:                                      ; preds = %vaarg.on_stack21, %vaarg.in_reg20
+  %vaarg.addr26 = phi i32* [ %15, %vaarg.in_reg20 ], [ %16, %vaarg.on_stack21 ]
+  %17 = load i32, i32* %vaarg.addr26
+  store i32 %17, i32* %d, align 4
+  %18 = load i32, i32* %d, align 4
+  %19 = load i32, i32* %ret, align 4
+  %add27 = add nsw i32 %19, %18
+  store i32 %add27, i32* %ret, align 4
+  %arraydecay28 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %arraydecay2829 = bitcast %struct.__va_list_tag* %arraydecay28 to i8*
+  call void @llvm.va_end(i8* %arraydecay2829)
+  %20 = load i32, i32* %ret, align 4
+  ret i32 %20
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %m = alloca i64, align 8
+  store i32 0, i32* %retval
+  store i64 1000000, i64* %m, align 8
+  %0 = load i64, i64* %m, align 8
+  %call = call i32 (i32, %struct.BBB*, ...) @foo(i32 1, %struct.BBB* byval align 8 bitcast ({ i8, i64, i32, [4 x i8] }* @ddd to %struct.BBB*), i64 %0, %struct.AAA* byval align 4 @aaa, i32 4)
+  store i32 %call, i32* %x, align 4
+  %1 = load i32, i32* %x, align 4
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %1)
+  %2 = load i32, i32* %x, align 4
+  ret i32 %2
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}

diff  --git a/llvm/test/CodeGen/Hexagon/vararg_double_onstack.ll b/llvm/test/CodeGen/Hexagon/vararg_double_onstack.ll
new file mode 100644
index 000000000000..0a755e57fe0c
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vararg_double_onstack.ll
@@ -0,0 +1,214 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv62  -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check Function prologue.
+; Note. All register numbers and offset are fixed.
+; Hence, no need of regular expression.
+
+; CHECK: r29 = add(r29,#-8)
+; CHECK: memw(r29+#4) = r5
+; CHECK: r29 = add(r29,#8)
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+ at aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+ at .str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, i32 %a, i32 %b, i32 %c, i32 %x, ...) #0 {
+entry:
+  %xx.addr = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  %ap = alloca [1 x %struct.__va_list_tag], align 8
+  %d = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %bbb = alloca %struct.AAA, align 4
+  store i32 %xx, i32* %xx.addr, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 0, i32* %ret, align 4
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  br label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %entry
+  %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 0
+  %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p
+  %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 1
+  %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p
+  %0 = ptrtoint i8* %__current_saved_reg_area_pointer to i32
+  %align_current_saved_reg_area_pointer = add i32 %0, 7
+  %align_current_saved_reg_area_pointer3 = and i32 %align_current_saved_reg_area_pointer, -8
+  %align_current_saved_reg_area_pointer4 = inttoptr i32 %align_current_saved_reg_area_pointer3 to i8*
+  %__new_saved_reg_area_pointer = getelementptr i8, i8* %align_current_saved_reg_area_pointer4, i32 8
+  %1 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+  br i1 %1, label %vaarg.on_stack, label %vaarg.in_reg
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %2 = bitcast i8* %align_current_saved_reg_area_pointer4 to i64*
+  store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg
+  %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 2
+  %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p
+  %3 = ptrtoint i8* %__overflow_area_pointer to i32
+  %align_overflow_area_pointer = add i32 %3, 7
+  %align_overflow_area_pointer5 = and i32 %align_overflow_area_pointer, -8
+  %align_overflow_area_pointer6 = inttoptr i32 %align_overflow_area_pointer5 to i8*
+  %__overflow_area_pointer.next = getelementptr i8, i8* %align_overflow_area_pointer6, i32 8
+  store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p
+  store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p
+  %4 = bitcast i8* %align_overflow_area_pointer6 to i64*
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %vaarg.addr = phi i64* [ %2, %vaarg.in_reg ], [ %4, %vaarg.on_stack ]
+  %5 = load i64, i64* %vaarg.addr
+  %conv = trunc i64 %5 to i32
+  store i32 %conv, i32* %d, align 4
+  %6 = load i32, i32* %d, align 4
+  %7 = load i32, i32* %ret, align 4
+  %add = add nsw i32 %7, %6
+  store i32 %add, i32* %ret, align 4
+  %arraydecay7 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %__overflow_area_pointer_p8 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay7, i32 0, i32 2
+  %__overflow_area_pointer9 = load i8*, i8** %__overflow_area_pointer_p8
+  %8 = bitcast i8* %__overflow_area_pointer9 to %struct.AAA*
+  %__overflow_area_pointer.next10 = getelementptr i8, i8* %__overflow_area_pointer9, i32 16
+  store i8* %__overflow_area_pointer.next10, i8** %__overflow_area_pointer_p8
+  %9 = bitcast %struct.AAA* %bbb to i8*
+  %10 = bitcast %struct.AAA* %8 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %9, i8* %10, i32 16, i32 4, i1 false)
+  %d11 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3
+  %11 = load i32, i32* %d11, align 4
+  %12 = load i32, i32* %ret, align 4
+  %add12 = add nsw i32 %12, %11
+  store i32 %add12, i32* %ret, align 4
+  %arraydecay13 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  br label %vaarg.maybe_reg14
+
+vaarg.maybe_reg14:                                ; preds = %vaarg.end
+  %__current_saved_reg_area_pointer_p15 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 0
+  %__current_saved_reg_area_pointer16 = load i8*, i8** %__current_saved_reg_area_pointer_p15
+  %__saved_reg_area_end_pointer_p17 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 1
+  %__saved_reg_area_end_pointer18 = load i8*, i8** %__saved_reg_area_end_pointer_p17
+  %__new_saved_reg_area_pointer19 = getelementptr i8, i8* %__current_saved_reg_area_pointer16, i32 4
+  %13 = icmp sgt i8* %__new_saved_reg_area_pointer19, %__saved_reg_area_end_pointer18
+  br i1 %13, label %vaarg.on_stack21, label %vaarg.in_reg20
+
+vaarg.in_reg20:                                   ; preds = %vaarg.maybe_reg14
+  %14 = bitcast i8* %__current_saved_reg_area_pointer16 to i32*
+  store i8* %__new_saved_reg_area_pointer19, i8** %__current_saved_reg_area_pointer_p15
+  br label %vaarg.end25
+
+vaarg.on_stack21:                                 ; preds = %vaarg.maybe_reg14
+  %__overflow_area_pointer_p22 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 2
+  %__overflow_area_pointer23 = load i8*, i8** %__overflow_area_pointer_p22
+  %__overflow_area_pointer.next24 = getelementptr i8, i8* %__overflow_area_pointer23, i32 4
+  store i8* %__overflow_area_pointer.next24, i8** %__overflow_area_pointer_p22
+  store i8* %__overflow_area_pointer.next24, i8** %__current_saved_reg_area_pointer_p15
+  %15 = bitcast i8* %__overflow_area_pointer23 to i32*
+  br label %vaarg.end25
+
+vaarg.end25:                                      ; preds = %vaarg.on_stack21, %vaarg.in_reg20
+  %vaarg.addr26 = phi i32* [ %14, %vaarg.in_reg20 ], [ %15, %vaarg.on_stack21 ]
+  %16 = load i32, i32* %vaarg.addr26
+  store i32 %16, i32* %d, align 4
+  %17 = load i32, i32* %d, align 4
+  %18 = load i32, i32* %ret, align 4
+  %add27 = add nsw i32 %18, %17
+  store i32 %add27, i32* %ret, align 4
+  %arraydecay28 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  br label %vaarg.maybe_reg29
+
+vaarg.maybe_reg29:                                ; preds = %vaarg.end25
+  %__current_saved_reg_area_pointer_p30 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 0
+  %__current_saved_reg_area_pointer31 = load i8*, i8** %__current_saved_reg_area_pointer_p30
+  %__saved_reg_area_end_pointer_p32 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 1
+  %__saved_reg_area_end_pointer33 = load i8*, i8** %__saved_reg_area_end_pointer_p32
+  %19 = ptrtoint i8* %__current_saved_reg_area_pointer31 to i32
+  %align_current_saved_reg_area_pointer34 = add i32 %19, 7
+  %align_current_saved_reg_area_pointer35 = and i32 %align_current_saved_reg_area_pointer34, -8
+  %align_current_saved_reg_area_pointer36 = inttoptr i32 %align_current_saved_reg_area_pointer35 to i8*
+  %__new_saved_reg_area_pointer37 = getelementptr i8, i8* %align_current_saved_reg_area_pointer36, i32 8
+  %20 = icmp sgt i8* %__new_saved_reg_area_pointer37, %__saved_reg_area_end_pointer33
+  br i1 %20, label %vaarg.on_stack39, label %vaarg.in_reg38
+
+vaarg.in_reg38:                                   ; preds = %vaarg.maybe_reg29
+  %21 = bitcast i8* %align_current_saved_reg_area_pointer36 to i64*
+  store i8* %__new_saved_reg_area_pointer37, i8** %__current_saved_reg_area_pointer_p30
+  br label %vaarg.end46
+
+vaarg.on_stack39:                                 ; preds = %vaarg.maybe_reg29
+  %__overflow_area_pointer_p40 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 2
+  %__overflow_area_pointer41 = load i8*, i8** %__overflow_area_pointer_p40
+  %22 = ptrtoint i8* %__overflow_area_pointer41 to i32
+  %align_overflow_area_pointer42 = add i32 %22, 7
+  %align_overflow_area_pointer43 = and i32 %align_overflow_area_pointer42, -8
+  %align_overflow_area_pointer44 = inttoptr i32 %align_overflow_area_pointer43 to i8*
+  %__overflow_area_pointer.next45 = getelementptr i8, i8* %align_overflow_area_pointer44, i32 8
+  store i8* %__overflow_area_pointer.next45, i8** %__overflow_area_pointer_p40
+  store i8* %__overflow_area_pointer.next45, i8** %__current_saved_reg_area_pointer_p30
+  %23 = bitcast i8* %align_overflow_area_pointer44 to i64*
+  br label %vaarg.end46
+
+vaarg.end46:                                      ; preds = %vaarg.on_stack39, %vaarg.in_reg38
+  %vaarg.addr47 = phi i64* [ %21, %vaarg.in_reg38 ], [ %23, %vaarg.on_stack39 ]
+  %24 = load i64, i64* %vaarg.addr47
+  %conv48 = trunc i64 %24 to i32
+  store i32 %conv48, i32* %d, align 4
+  %25 = load i32, i32* %d, align 4
+  %26 = load i32, i32* %ret, align 4
+  %add49 = add nsw i32 %26, %25
+  store i32 %add49, i32* %ret, align 4
+  %arraydecay50 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %arraydecay5051 = bitcast %struct.__va_list_tag* %arraydecay50 to i8*
+  call void @llvm.va_end(i8* %arraydecay5051)
+  %27 = load i32, i32* %ret, align 4
+  ret i32 %27
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i64, align 8
+  store i32 0, i32* %retval
+  store i64 1000000, i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
+  %1 = load i64, i64* %y, align 8
+  %call = call i32 (i32, i32, i32, i32, i32, ...) @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i64 %0, %struct.AAA* byval align 4 @aaa, i32 4, i64 %1)
+  store i32 %call, i32* %x, align 4
+  %2 = load i32, i32* %x, align 4
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %2)
+  %3 = load i32, i32* %x, align 4
+  ret i32 %3
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}

diff  --git a/llvm/test/CodeGen/Hexagon/vararg_named.ll b/llvm/test/CodeGen/Hexagon/vararg_named.ll
new file mode 100644
index 000000000000..5a357284caf8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vararg_named.ll
@@ -0,0 +1,211 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check Function prologue.
+; Note. All register numbers and offset are fixed.
+; Hence, no need of regular expression.
+
+; CHECK: r29 = add(r29,#-16)
+; CHECK: r7:6 = memd(r29+#16)
+; CHECK: memd(r29+#0) = r7:6
+; CHECK: r7:6 = memd(r29+#24)
+; CHECK: memd(r29+#8) = r7:6
+; CHECK: r7:6 = memd(r29+#32)
+; CHECK: memd(r29+#16) = r7:6
+; CHECK: r7:6 = memd(r29+#40)
+; CHECK: memd(r29+#24) = r7:6
+; CHECK: memw(r29+#36) = r3
+; CHECK: memw(r29+#40) = r4
+; CHECK: memw(r29+#44) = r5
+; CHECK: r29 = add(r29,#16)
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+ at aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+ at xxx = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+ at yyy = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+ at ccc = global %struct.AAA { i32 10, i32 20, i32 30, i32 40 }, align 4
+ at fff = global %struct.AAA { i32 1, i32 2, i32 3, i32 4 }, align 4
+ at .str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, i32 %z, i32 %m, %struct.AAA* byval align 4 %bbb, %struct.AAA* byval align 4 %GGG, ...) #0 {
+entry:
+  %xx.addr = alloca i32, align 4
+  %z.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  %ap = alloca [1 x %struct.__va_list_tag], align 8
+  %d = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %ddd = alloca %struct.AAA, align 4
+  %ggg = alloca %struct.AAA, align 4
+  %nnn = alloca %struct.AAA, align 4
+  store i32 %xx, i32* %xx.addr, align 4
+  store i32 %z, i32* %z.addr, align 4
+  store i32 %m, i32* %m.addr, align 4
+  store i32 0, i32* %ret, align 4
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  %d2 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3
+  %0 = load i32, i32* %d2, align 4
+  %1 = load i32, i32* %ret, align 4
+  %add = add nsw i32 %1, %0
+  store i32 %add, i32* %ret, align 4
+  %2 = load i32, i32* %z.addr, align 4
+  %3 = load i32, i32* %ret, align 4
+  %add3 = add nsw i32 %3, %2
+  store i32 %add3, i32* %ret, align 4
+  %arraydecay4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  br label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %entry
+  %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 0
+  %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p
+  %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 1
+  %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p
+  %__new_saved_reg_area_pointer = getelementptr i8, i8* %__current_saved_reg_area_pointer, i32 4
+  %4 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+  br i1 %4, label %vaarg.on_stack, label %vaarg.in_reg
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %5 = bitcast i8* %__current_saved_reg_area_pointer to i32*
+  store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg
+  %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 2
+  %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p
+  %__overflow_area_pointer.next = getelementptr i8, i8* %__overflow_area_pointer, i32 4
+  store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p
+  store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p
+  %6 = bitcast i8* %__overflow_area_pointer to i32*
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %vaarg.addr = phi i32* [ %5, %vaarg.in_reg ], [ %6, %vaarg.on_stack ]
+  %7 = load i32, i32* %vaarg.addr
+  store i32 %7, i32* %d, align 4
+  %8 = load i32, i32* %d, align 4
+  %9 = load i32, i32* %ret, align 4
+  %add5 = add nsw i32 %9, %8
+  store i32 %add5, i32* %ret, align 4
+  %arraydecay6 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %__overflow_area_pointer_p7 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay6, i32 0, i32 2
+  %__overflow_area_pointer8 = load i8*, i8** %__overflow_area_pointer_p7
+  %10 = bitcast i8* %__overflow_area_pointer8 to %struct.AAA*
+  %__overflow_area_pointer.next9 = getelementptr i8, i8* %__overflow_area_pointer8, i32 16
+  store i8* %__overflow_area_pointer.next9, i8** %__overflow_area_pointer_p7
+  %11 = bitcast %struct.AAA* %ddd to i8*
+  %12 = bitcast %struct.AAA* %10 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %11, i8* %12, i32 16, i32 4, i1 false)
+  %d10 = getelementptr inbounds %struct.AAA, %struct.AAA* %ddd, i32 0, i32 3
+  %13 = load i32, i32* %d10, align 4
+  %14 = load i32, i32* %ret, align 4
+  %add11 = add nsw i32 %14, %13
+  store i32 %add11, i32* %ret, align 4
+  %arraydecay12 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %__overflow_area_pointer_p13 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay12, i32 0, i32 2
+  %__overflow_area_pointer14 = load i8*, i8** %__overflow_area_pointer_p13
+  %15 = bitcast i8* %__overflow_area_pointer14 to %struct.AAA*
+  %__overflow_area_pointer.next15 = getelementptr i8, i8* %__overflow_area_pointer14, i32 16
+  store i8* %__overflow_area_pointer.next15, i8** %__overflow_area_pointer_p13
+  %16 = bitcast %struct.AAA* %ggg to i8*
+  %17 = bitcast %struct.AAA* %15 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %16, i8* %17, i32 16, i32 4, i1 false)
+  %d16 = getelementptr inbounds %struct.AAA, %struct.AAA* %ggg, i32 0, i32 3
+  %18 = load i32, i32* %d16, align 4
+  %19 = load i32, i32* %ret, align 4
+  %add17 = add nsw i32 %19, %18
+  store i32 %add17, i32* %ret, align 4
+  %arraydecay18 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %__overflow_area_pointer_p19 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay18, i32 0, i32 2
+  %__overflow_area_pointer20 = load i8*, i8** %__overflow_area_pointer_p19
+  %20 = bitcast i8* %__overflow_area_pointer20 to %struct.AAA*
+  %__overflow_area_pointer.next21 = getelementptr i8, i8* %__overflow_area_pointer20, i32 16
+  store i8* %__overflow_area_pointer.next21, i8** %__overflow_area_pointer_p19
+  %21 = bitcast %struct.AAA* %nnn to i8*
+  %22 = bitcast %struct.AAA* %20 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %21, i8* %22, i32 16, i32 4, i1 false)
+  %d22 = getelementptr inbounds %struct.AAA, %struct.AAA* %nnn, i32 0, i32 3
+  %23 = load i32, i32* %d22, align 4
+  %24 = load i32, i32* %ret, align 4
+  %add23 = add nsw i32 %24, %23
+  store i32 %add23, i32* %ret, align 4
+  %arraydecay24 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  br label %vaarg.maybe_reg25
+
+vaarg.maybe_reg25:                                ; preds = %vaarg.end
+  %__current_saved_reg_area_pointer_p26 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 0
+  %__current_saved_reg_area_pointer27 = load i8*, i8** %__current_saved_reg_area_pointer_p26
+  %__saved_reg_area_end_pointer_p28 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 1
+  %__saved_reg_area_end_pointer29 = load i8*, i8** %__saved_reg_area_end_pointer_p28
+  %__new_saved_reg_area_pointer30 = getelementptr i8, i8* %__current_saved_reg_area_pointer27, i32 4
+  %25 = icmp sgt i8* %__new_saved_reg_area_pointer30, %__saved_reg_area_end_pointer29
+  br i1 %25, label %vaarg.on_stack32, label %vaarg.in_reg31
+
+vaarg.in_reg31:                                   ; preds = %vaarg.maybe_reg25
+  %26 = bitcast i8* %__current_saved_reg_area_pointer27 to i32*
+  store i8* %__new_saved_reg_area_pointer30, i8** %__current_saved_reg_area_pointer_p26
+  br label %vaarg.end36
+
+vaarg.on_stack32:                                 ; preds = %vaarg.maybe_reg25
+  %__overflow_area_pointer_p33 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 2
+  %__overflow_area_pointer34 = load i8*, i8** %__overflow_area_pointer_p33
+  %__overflow_area_pointer.next35 = getelementptr i8, i8* %__overflow_area_pointer34, i32 4
+  store i8* %__overflow_area_pointer.next35, i8** %__overflow_area_pointer_p33
+  store i8* %__overflow_area_pointer.next35, i8** %__current_saved_reg_area_pointer_p26
+  %27 = bitcast i8* %__overflow_area_pointer34 to i32*
+  br label %vaarg.end36
+
+vaarg.end36:                                      ; preds = %vaarg.on_stack32, %vaarg.in_reg31
+  %vaarg.addr37 = phi i32* [ %26, %vaarg.in_reg31 ], [ %27, %vaarg.on_stack32 ]
+  %28 = load i32, i32* %vaarg.addr37
+  store i32 %28, i32* %d, align 4
+  %29 = load i32, i32* %d, align 4
+  %30 = load i32, i32* %ret, align 4
+  %add38 = add nsw i32 %30, %29
+  store i32 %add38, i32* %ret, align 4
+  %31 = load i32, i32* %m.addr, align 4
+  %32 = load i32, i32* %ret, align 4
+  %add39 = add nsw i32 %32, %31
+  store i32 %add39, i32* %ret, align 4
+  %arraydecay40 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+  %arraydecay4041 = bitcast %struct.__va_list_tag* %arraydecay40 to i8*
+  call void @llvm.va_end(i8* %arraydecay4041)
+  %33 = load i32, i32* %ret, align 4
+  ret i32 %33
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i32, i32, i32, %struct.AAA*, %struct.AAA*, ...) @foo(i32 1, i32 3, i32 5, %struct.AAA* byval align 4 @aaa, %struct.AAA* byval align 4 @fff, i32 2, %struct.AAA* byval align 4 @xxx, %struct.AAA* byval align 4 @yyy, %struct.AAA* byval align 4 @ccc, i32 4)
+  store i32 %call, i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %0)
+  %1 = load i32, i32* %x, align 4
+  ret i32 %1
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}


        


More information about the llvm-commits mailing list