[llvm] r332449 - [GlobalISel][IRTranslator] Split aggregates during IR translation.

Wed May 16 03:32:02 PDT 2018

Author: aemerson
Date: Wed May 16 03:32:02 2018
New Revision: 332449

URL: http://llvm.org/viewvc/llvm-project?rev=332449&view=rev
Log:
[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

Added:
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h
    llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp
    llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll
    llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
    llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll

Modified: llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h (original)
+++ llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h Wed May 16 03:32:02 2018
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Types.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/IR/Intrinsics.h"
 #include <memory>
 #include <utility>
@@ -63,9 +64,83 @@ private:
   /// Interface used to lower the everything related to calls.
   const CallLowering *CLI;
 
-  /// Mapping of the values of the current LLVM IR function
-  /// to the related virtual registers.
-  ValueToVReg ValToVReg;
+  /// This class contains the mapping between the Values to vreg related data.
+  class ValueToVRegInfo {
+  public:
+    ValueToVRegInfo() = default;
+
+    using VRegListT = SmallVector<unsigned, 1>;
+    using OffsetListT = SmallVector<uint64_t, 1>;
+
+    using const_vreg_iterator =
+        DenseMap<const Value *, VRegListT *>::const_iterator;
+    using const_offset_iterator =
+        DenseMap<const Value *, OffsetListT *>::const_iterator;
+
+    inline const_vreg_iterator vregs_end() const { return ValToVRegs.end(); }
+
+    VRegListT *getVRegs(const Value &V) {
+      auto It = ValToVRegs.find(&V);
+      if (It != ValToVRegs.end())
+        return It->second;
+
+      return insertVRegs(V);
+    }
+
+    OffsetListT *getOffsets(const Value &V) {
+      auto It = TypeToOffsets.find(V.getType());
+      if (It != TypeToOffsets.end())
+        return It->second;
+
+      return insertOffsets(V);
+    }
+
+    const_vreg_iterator findVRegs(const Value &V) const {
+      return ValToVRegs.find(&V);
+    }
+
+    bool contains(const Value &V) const {
+      return ValToVRegs.find(&V) != ValToVRegs.end();
+    }
+
+    void reset() {
+      ValToVRegs.clear();
+      TypeToOffsets.clear();
+      VRegAlloc.DestroyAll();
+      OffsetAlloc.DestroyAll();
+    }
+
+  private:
+    VRegListT *insertVRegs(const Value &V) {
+      assert(ValToVRegs.find(&V) == ValToVRegs.end() && "Value already exists");
+
+      // We placement new using our fast allocator since we never try to free
+      // the vectors until translation is finished.
+      auto *VRegList = new (VRegAlloc.Allocate()) VRegListT();
+      ValToVRegs[&V] = VRegList;
+      return VRegList;
+    }
+
+    OffsetListT *insertOffsets(const Value &V) {
+      assert(TypeToOffsets.find(V.getType()) == TypeToOffsets.end() &&
+             "Type already exists");
+
+      auto *OffsetList = new (OffsetAlloc.Allocate()) OffsetListT();
+      TypeToOffsets[V.getType()] = OffsetList;
+      return OffsetList;
+    }
+    SpecificBumpPtrAllocator<VRegListT> VRegAlloc;
+    SpecificBumpPtrAllocator<OffsetListT> OffsetAlloc;
+
+    // We store pointers to vectors here since references may be invalidated
+    // while we hold them if we stored the vectors directly.
+    DenseMap<const Value *, VRegListT*> ValToVRegs;
+    DenseMap<const Type *, OffsetListT*> TypeToOffsets;
+  };
+
+  /// Mapping of the values of the current LLVM IR function to the related
+  /// virtual registers and offsets.
+  ValueToVRegInfo VMap;
 
   // N.b. it's not completely obvious that this will be sufficient for every
   // LLVM IR construct (with "invoke" being the obvious candidate to mess up our
@@ -82,7 +157,8 @@ private:
 
   // List of stubbed PHI instructions, for values and basic blocks to be filled
   // in once all MachineBasicBlocks have been created.
-  SmallVector<std::pair<const PHINode *, MachineInstr *>, 4> PendingPHIs;
+  SmallVector<std::pair<const PHINode *, SmallVector<MachineInstr *, 1>>, 4>
+      PendingPHIs;
 
   /// Record of what frame index has been allocated to specified allocas for
   /// this function.
@@ -99,7 +175,7 @@ private:
   /// The general algorithm is:
   /// 1. Look for a virtual register for each operand or
   ///    create one.
-  /// 2 Update the ValToVReg accordingly.
+  /// 2 Update the VMap accordingly.
   /// 2.alt. For constant arguments, if they are compile time constants,
   ///   produce an immediate in the right operand and do not touch
   ///   ValToReg. Actually we will go with a virtual register for each
@@ -146,6 +222,19 @@ private:
 
   bool translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder);
 
+  // FIXME: temporary function to expose previous interface to call lowering
+  // until it is refactored.
+  /// Combines all component registers of \p V into a single scalar with size
+  /// "max(Offsets) + last size".
+  unsigned packRegs(const Value &V, MachineIRBuilder &MIRBuilder);
+
+  void unpackRegs(const Value &V, unsigned Src, MachineIRBuilder &MIRBuilder);
+
+  /// Returns true if the value should be split into multiple LLTs.
+  /// If \p Offsets is given then the split type's offsets will be stored in it.
+  bool valueIsSplit(const Value &V,
+                    SmallVectorImpl<uint64_t> *Offsets = nullptr);
+
   /// Translate call instruction.
   /// \pre \p U is a call instruction.
   bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
@@ -381,9 +470,24 @@ private:
   // * Clear the different maps.
   void finalizeFunction();
 
-  /// Get the VReg that represents \p Val.
-  /// If such VReg does not exist, it is created.
-  unsigned getOrCreateVReg(const Value &Val);
+  /// Get the VRegs that represent \p Val.
+  /// Non-aggregate types have just one corresponding VReg and the list can be
+  /// used as a single "unsigned". Aggregates get flattened. If such VRegs do
+  /// not exist, they are created.
+  ArrayRef<unsigned> getOrCreateVRegs(const Value &Val);
+
+  unsigned getOrCreateVReg(const Value &Val) {
+    auto Regs = getOrCreateVRegs(Val);
+    if (Regs.empty())
+      return 0;
+    assert(Regs.size() == 1 &&
+           "attempt to get single VReg for aggregate or void");
+    return Regs[0];
+  }
+
+  /// Allocate some vregs and offsets in the VMap. Then populate just the
+  /// offsets while leaving the vregs empty.
+  ValueToVRegInfo::VRegListT &allocateVRegs(const Value &Val);
 
   /// Get the frame index that represents \p Val.
   /// If such VReg does not exist, it is created.

Modified: llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp Wed May 16 03:32:02 2018
@@ -106,33 +106,97 @@ void IRTranslator::getAnalysisUsage(Anal
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
-  unsigned &ValReg = ValToVReg[&Val];
-
-  if (ValReg)
-    return ValReg;
+static void computeValueLLTs(const DataLayout &DL, Type &Ty,
+                             SmallVectorImpl<LLT> &ValueTys,
+                             SmallVectorImpl<uint64_t> *Offsets = nullptr,
+                             uint64_t StartingOffset = 0) {
+  // Given a struct type, recursively traverse the elements.
+  if (StructType *STy = dyn_cast<StructType>(&Ty)) {
+    const StructLayout *SL = DL.getStructLayout(STy);
+    for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+      computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
+                       StartingOffset + SL->getElementOffset(I));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
+    Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
+                       StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty.isVoidTy())
+    return;
+  // Base case: we can get an LLT for this LLVM IR type.
+  ValueTys.push_back(getLLTForType(Ty, DL));
+  if (Offsets != nullptr)
+    Offsets->push_back(StartingOffset * 8);
+}
+
+IRTranslator::ValueToVRegInfo::VRegListT &
+IRTranslator::allocateVRegs(const Value &Val) {
+  assert(!VMap.contains(Val) && "Value already allocated in VMap");
+  auto *Regs = VMap.getVRegs(Val);
+  auto *Offsets = VMap.getOffsets(Val);
+  SmallVector<LLT, 4> SplitTys;
+  computeValueLLTs(*DL, *Val.getType(), SplitTys,
+                   Offsets->empty() ? Offsets : nullptr);
+  for (unsigned i = 0; i < SplitTys.size(); ++i)
+    Regs->push_back(0);
+  return *Regs;
+}
+
+ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
+  auto VRegsIt = VMap.findVRegs(Val);
+  if (VRegsIt != VMap.vregs_end())
+    return *VRegsIt->second;
+
+  if (Val.getType()->isVoidTy())
+    return *VMap.getVRegs(Val);
+
+  // Create entry for this type.
+  auto *VRegs = VMap.getVRegs(Val);
+  auto *Offsets = VMap.getOffsets(Val);
 
-  // Fill ValRegsSequence with the sequence of registers
-  // we need to concat together to produce the value.
   assert(Val.getType()->isSized() &&
          "Don't know how to create an empty vreg");
-  unsigned VReg =
-      MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL));
-  ValReg = VReg;
 
-  if (auto CV = dyn_cast<Constant>(&Val)) {
-    bool Success = translate(*CV, VReg);
+  SmallVector<LLT, 4> SplitTys;
+  computeValueLLTs(*DL, *Val.getType(), SplitTys,
+                   Offsets->empty() ? Offsets : nullptr);
+
+  if (!isa<Constant>(Val)) {
+    for (auto Ty : SplitTys)
+      VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
+    return *VRegs;
+  }
+
+  if (Val.getType()->isAggregateType()) {
+    // UndefValue, ConstantAggregateZero
+    auto &C = cast<Constant>(Val);
+    unsigned Idx = 0;
+    while (auto Elt = C.getAggregateElement(Idx++)) {
+      auto EltRegs = getOrCreateVRegs(*Elt);
+      std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs));
+    }
+  } else {
+    assert(SplitTys.size() == 1 && "unexpectedly split LLT");
+    VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
+    bool Success = translate(cast<Constant>(Val), VRegs->front());
     if (!Success) {
       OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                  MF->getFunction().getSubprogram(),
                                  &MF->getFunction().getEntryBlock());
       R << "unable to translate constant: " << ore::NV("Type", Val.getType());
       reportTranslationError(*MF, *TPC, *ORE, R);
-      return VReg;
+      return *VRegs;
     }
   }
 
-  return VReg;
+  return *VRegs;
 }
 
 int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
@@ -243,7 +307,11 @@ bool IRTranslator::translateRet(const Us
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
-  return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret));
+
+  // FIXME: this interface should simplify when CallLowering gets adapted to
+  // multiple VRegs per Value.
+  unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0;
+  return CLI->lowerReturn(MIRBuilder, Ret, VReg);
 }
 
 bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -342,15 +410,23 @@ bool IRTranslator::translateLoad(const U
   if (DL->getTypeStoreSize(LI.getType()) == 0)
     return true;
 
-  unsigned Res = getOrCreateVReg(LI);
-  unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
+  ArrayRef<unsigned> Regs = getOrCreateVRegs(LI);
+  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
+  unsigned Base = getOrCreateVReg(*LI.getPointerOperand());
+
+  for (unsigned i = 0; i < Regs.size(); ++i) {
+    unsigned Addr = 0;
+    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+
+    MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
+    unsigned BaseAlign = getMemOpAlignment(LI);
+    auto MMO = MF->getMachineMemOperand(
+        Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
+        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+        LI.getSyncScopeID(), LI.getOrdering());
+    MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
+  }
 
-  MIRBuilder.buildLoad(
-      Res, Addr,
-      *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
-                                Flags, DL->getTypeStoreSize(LI.getType()),
-                                getMemOpAlignment(LI), AAMDNodes(), nullptr,
-                                LI.getSyncScopeID(), LI.getOrdering()));
   return true;
 }
 
@@ -363,50 +439,61 @@ bool IRTranslator::translateStore(const
   if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
     return true;
 
-  unsigned Val = getOrCreateVReg(*SI.getValueOperand());
-  unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
+  ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand());
+  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
+  unsigned Base = getOrCreateVReg(*SI.getPointerOperand());
 
-  MIRBuilder.buildStore(
-      Val, Addr,
-      *MF->getMachineMemOperand(
-          MachinePointerInfo(SI.getPointerOperand()), Flags,
-          DL->getTypeStoreSize(SI.getValueOperand()->getType()),
-          getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(),
-          SI.getOrdering()));
+  for (unsigned i = 0; i < Vals.size(); ++i) {
+    unsigned Addr = 0;
+    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+
+    MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
+    unsigned BaseAlign = getMemOpAlignment(SI);
+    auto MMO = MF->getMachineMemOperand(
+        Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
+        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+        SI.getSyncScopeID(), SI.getOrdering());
+    MIRBuilder.buildStore(Vals[i], Addr, *MMO);
+  }
   return true;
 }
 
-bool IRTranslator::translateExtractValue(const User &U,
-                                         MachineIRBuilder &MIRBuilder) {
+static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
   const Value *Src = U.getOperand(0);
   Type *Int32Ty = Type::getInt32Ty(U.getContext());
-  SmallVector<Value *, 1> Indices;
-
-  // If Src is a single element ConstantStruct, translate extractvalue
-  // to that element to avoid inserting a cast instruction.
-  if (auto CS = dyn_cast<ConstantStruct>(Src))
-    if (CS->getNumOperands() == 1) {
-      unsigned Res = getOrCreateVReg(*CS->getOperand(0));
-      ValToVReg[&U] = Res;
-      return true;
-    }
 
   // getIndexedOffsetInType is designed for GEPs, so the first index is the
   // usual array element rather than looking into the actual aggregate.
+  SmallVector<Value *, 1> Indices;
   Indices.push_back(ConstantInt::get(Int32Ty, 0));
 
   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
     for (auto Idx : EVI->indices())
       Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
+    for (auto Idx : IVI->indices())
+      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
   } else {
     for (unsigned i = 1; i < U.getNumOperands(); ++i)
       Indices.push_back(U.getOperand(i));
   }
 
-  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+  return 8 * static_cast<uint64_t>(
+                 DL.getIndexedOffsetInType(Src->getType(), Indices));
+}
 
-  unsigned Res = getOrCreateVReg(U);
-  MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset);
+bool IRTranslator::translateExtractValue(const User &U,
+                                         MachineIRBuilder &MIRBuilder) {
+  const Value *Src = U.getOperand(0);
+  uint64_t Offset = getOffsetFromIndices(U, *DL);
+  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
+  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
+  unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) -
+                 Offsets.begin();
+  auto &DstRegs = allocateVRegs(U);
+
+  for (unsigned i = 0; i < DstRegs.size(); ++i)
+    DstRegs[i] = SrcRegs[Idx++];
 
   return true;
 }
@@ -414,37 +501,33 @@ bool IRTranslator::translateExtractValue
 bool IRTranslator::translateInsertValue(const User &U,
                                         MachineIRBuilder &MIRBuilder) {
   const Value *Src = U.getOperand(0);
-  Type *Int32Ty = Type::getInt32Ty(U.getContext());
-  SmallVector<Value *, 1> Indices;
-
-  // getIndexedOffsetInType is designed for GEPs, so the first index is the
-  // usual array element rather than looking into the actual aggregate.
-  Indices.push_back(ConstantInt::get(Int32Ty, 0));
-
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
-    for (auto Idx : IVI->indices())
-      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
-  } else {
-    for (unsigned i = 2; i < U.getNumOperands(); ++i)
-      Indices.push_back(U.getOperand(i));
+  uint64_t Offset = getOffsetFromIndices(U, *DL);
+  auto &DstRegs = allocateVRegs(U);
+  ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
+  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
+  ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
+  auto InsertedIt = InsertedRegs.begin();
+
+  for (unsigned i = 0; i < DstRegs.size(); ++i) {
+    if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
+      DstRegs[i] = *InsertedIt++;
+    else
+      DstRegs[i] = SrcRegs[i];
   }
 
-  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
-
-  unsigned Res = getOrCreateVReg(U);
-  unsigned Inserted = getOrCreateVReg(*U.getOperand(1));
-  MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset);
-
   return true;
 }
 
 bool IRTranslator::translateSelect(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
-  unsigned Res = getOrCreateVReg(U);
   unsigned Tst = getOrCreateVReg(*U.getOperand(0));
-  unsigned Op0 = getOrCreateVReg(*U.getOperand(1));
-  unsigned Op1 = getOrCreateVReg(*U.getOperand(2));
-  MIRBuilder.buildSelect(Res, Tst, Op0, Op1);
+  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U);
+  ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
+  ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
+
+  for (unsigned i = 0; i < ResRegs.size(); ++i)
+    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+
   return true;
 }
 
@@ -453,15 +536,16 @@ bool IRTranslator::translateBitCast(cons
   // If we're bitcasting to the source type, we can reuse the source vreg.
   if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
       getLLTForType(*U.getType(), *DL)) {
-    // Get the source vreg now, to avoid invalidating ValToVReg.
     unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
-    unsigned &Reg = ValToVReg[&U];
+    auto &Regs = *VMap.getVRegs(U);
     // If we already assigned a vreg for this bitcast, we can't change that.
     // Emit a copy to satisfy the users we already emitted.
-    if (Reg)
-      MIRBuilder.buildCopy(Reg, SrcReg);
-    else
-      Reg = SrcReg;
+    if (!Regs.empty())
+      MIRBuilder.buildCopy(Regs[0], SrcReg);
+    else {
+      Regs.push_back(SrcReg);
+      VMap.getOffsets(U)->push_back(0);
+    }
     return true;
   }
   return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
@@ -612,14 +696,10 @@ void IRTranslator::getStackGuard(unsigne
 
 bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                               MachineIRBuilder &MIRBuilder) {
-  LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL);
-  LLT s1 = LLT::scalar(1);
-  unsigned Width = Ty.getSizeInBits();
-  unsigned Res = MRI->createGenericVirtualRegister(Ty);
-  unsigned Overflow = MRI->createGenericVirtualRegister(s1);
+  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
   auto MIB = MIRBuilder.buildInstr(Op)
-                 .addDef(Res)
-                 .addDef(Overflow)
+                 .addDef(ResRegs[0])
+                 .addDef(ResRegs[1])
                  .addUse(getOrCreateVReg(*CI.getOperand(0)))
                  .addUse(getOrCreateVReg(*CI.getOperand(1)));
 
@@ -629,7 +709,6 @@ bool IRTranslator::translateOverflowIntr
     MIB.addUse(Zero);
   }
 
-  MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width});
   return true;
 }
 
@@ -836,6 +915,34 @@ bool IRTranslator::translateInlineAsm(co
   return true;
 }
 
+unsigned IRTranslator::packRegs(const Value &V,
+                                  MachineIRBuilder &MIRBuilder) {
+  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
+  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
+  LLT BigTy = getLLTForType(*V.getType(), *DL);
+
+  if (Regs.size() == 1)
+    return Regs[0];
+
+  unsigned Dst = MRI->createGenericVirtualRegister(BigTy);
+  MIRBuilder.buildUndef(Dst);
+  for (unsigned i = 0; i < Regs.size(); ++i) {
+    unsigned NewDst = MRI->createGenericVirtualRegister(BigTy);
+    MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]);
+    Dst = NewDst;
+  }
+  return Dst;
+}
+
+void IRTranslator::unpackRegs(const Value &V, unsigned Src,
+                                MachineIRBuilder &MIRBuilder) {
+  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
+  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
+
+  for (unsigned i = 0; i < Regs.size(); ++i)
+    MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]);
+}
+
 bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   const CallInst &CI = cast<CallInst>(U);
   auto TII = MF->getTarget().getIntrinsicInfo();
@@ -855,16 +962,24 @@ bool IRTranslator::translateCall(const U
       ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
   }
 
+  bool IsSplitType = valueIsSplit(CI);
   if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
-    unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+    unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
+                                     getLLTForType(*CI.getType(), *DL))
+                               : getOrCreateVReg(CI);
+
     SmallVector<unsigned, 8> Args;
     for (auto &Arg: CI.arg_operands())
-      Args.push_back(getOrCreateVReg(*Arg));
+      Args.push_back(packRegs(*Arg, MIRBuilder));
 
     MF->getFrameInfo().setHasCalls(true);
-    return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
+    bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
       return getOrCreateVReg(*CI.getCalledValue());
     });
+
+    if (IsSplitType)
+      unpackRegs(CI, Res, MIRBuilder);
+    return Success;
   }
 
   assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
@@ -872,7 +987,14 @@ bool IRTranslator::translateCall(const U
   if (translateKnownIntrinsic(CI, ID, MIRBuilder))
     return true;
 
-  unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+  unsigned Res = 0;
+  if (!CI.getType()->isVoidTy()) {
+    if (IsSplitType)
+      Res =
+          MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
+    else
+      Res = getOrCreateVReg(CI);
+  }
   MachineInstrBuilder MIB =
       MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
 
@@ -880,9 +1002,12 @@ bool IRTranslator::translateCall(const U
     // Some intrinsics take metadata parameters. Reject them.
     if (isa<MetadataAsValue>(Arg))
       return false;
-    MIB.addUse(getOrCreateVReg(*Arg));
+    MIB.addUse(packRegs(*Arg, MIRBuilder));
   }
 
+  if (IsSplitType)
+    unpackRegs(CI, Res, MIRBuilder);
+
   // Add a MachineMemOperand if it is a target mem intrinsic.
   const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
   TargetLowering::IntrinsicInfo Info;
@@ -926,15 +1051,18 @@ bool IRTranslator::translateInvoke(const
   MCSymbol *BeginSymbol = Context.createTempSymbol();
   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
 
-  unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
+  unsigned Res =
+        MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
   SmallVector<unsigned, 8> Args;
   for (auto &Arg: I.arg_operands())
-    Args.push_back(getOrCreateVReg(*Arg));
+    Args.push_back(packRegs(*Arg, MIRBuilder));
 
   if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
                       [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
     return false;
 
+  unpackRegs(I, Res, MIRBuilder);
+
   MCSymbol *EndSymbol = Context.createTempSymbol();
   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
 
@@ -993,27 +1121,18 @@ bool IRTranslator::translateLandingPad(c
     return false;
 
   MBB.addLiveIn(ExceptionReg);
-  unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]),
-           Tmp = MRI->createGenericVirtualRegister(Ty);
-  MIRBuilder.buildCopy(VReg, ExceptionReg);
-  MIRBuilder.buildInsert(Tmp, Undef, VReg, 0);
+  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP);
+  MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
 
   unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
   if (!SelectorReg)
     return false;
 
   MBB.addLiveIn(SelectorReg);
-
-  // N.b. the exception selector register always has pointer type and may not
-  // match the actual IR-level type in the landingpad so an extra cast is
-  // needed.
   unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
   MIRBuilder.buildCopy(PtrVReg, SelectorReg);
+  MIRBuilder.buildCast(ResRegs[1], PtrVReg);
 
-  VReg = MRI->createGenericVirtualRegister(Tys[1]);
-  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg);
-  MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg,
-                         Tys[0].getSizeInBits());
   return true;
 }
 
@@ -1103,9 +1222,16 @@ bool IRTranslator::translateInsertElemen
   // not a legal vector type in LLT.
   if (U.getType()->getVectorNumElements() == 1) {
     unsigned Elt = getOrCreateVReg(*U.getOperand(1));
-    ValToVReg[&U] = Elt;
+    auto &Regs = *VMap.getVRegs(U);
+    if (Regs.empty()) {
+      Regs.push_back(Elt);
+      VMap.getOffsets(U)->push_back(0);
+    } else {
+      MIRBuilder.buildCopy(Regs[0], Elt);
+    }
     return true;
   }
+
   unsigned Res = getOrCreateVReg(U);
   unsigned Val = getOrCreateVReg(*U.getOperand(0));
   unsigned Elt = getOrCreateVReg(*U.getOperand(1));
@@ -1120,7 +1246,13 @@ bool IRTranslator::translateExtractEleme
   // not a legal vector type in LLT.
   if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
     unsigned Elt = getOrCreateVReg(*U.getOperand(0));
-    ValToVReg[&U] = Elt;
+    auto &Regs = *VMap.getVRegs(U);
+    if (Regs.empty()) {
+      Regs.push_back(Elt);
+      VMap.getOffsets(U)->push_back(0);
+    } else {
+      MIRBuilder.buildCopy(Regs[0], Elt);
+    }
     return true;
   }
   unsigned Res = getOrCreateVReg(U);
@@ -1142,17 +1274,21 @@ bool IRTranslator::translateShuffleVecto
 
 bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
   const PHINode &PI = cast<PHINode>(U);
-  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
-  MIB.addDef(getOrCreateVReg(PI));
 
-  PendingPHIs.emplace_back(&PI, MIB.getInstr());
+  SmallVector<MachineInstr *, 4> Insts;
+  for (auto Reg : getOrCreateVRegs(PI)) {
+    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg);
+    Insts.push_back(MIB.getInstr());
+  }
+
+  PendingPHIs.emplace_back(&PI, std::move(Insts));
   return true;
 }
 
 void IRTranslator::finishPendingPhis() {
-  for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) {
+  for (auto &Phi : PendingPHIs) {
     const PHINode *PI = Phi.first;
-    MachineInstrBuilder MIB(*MF, Phi.second);
+    ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
 
     // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
     // won't create extra control flow here, otherwise we need to find the
@@ -1166,17 +1302,27 @@ void IRTranslator::finishPendingPhis() {
         continue;
 
       HandledPreds.insert(IRPred);
-      unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i));
+      ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
       for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
-        assert(Pred->isSuccessor(MIB->getParent()) &&
+        assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&
                "incorrect CFG at MachineBasicBlock level");
-        MIB.addUse(ValReg);
-        MIB.addMBB(Pred);
+        for (unsigned j = 0; j < ValRegs.size(); ++j) {
+          MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
+          MIB.addUse(ValRegs[j]);
+          MIB.addMBB(Pred);
+        }
       }
     }
   }
 }
 
+bool IRTranslator::valueIsSplit(const Value &V,
+                                SmallVectorImpl<uint64_t> *Offsets) {
+  SmallVector<LLT, 4> SplitTys;
+  computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
+  return SplitTys.size() > 1;
+}
+
 bool IRTranslator::translate(const Instruction &Inst) {
   CurBuilder.setDebugLoc(Inst.getDebugLoc());
   switch(Inst.getOpcode()) {
@@ -1235,23 +1381,6 @@ bool IRTranslator::translate(const Const
     default:
       return false;
     }
-  } else if (auto CS = dyn_cast<ConstantStruct>(&C)) {
-    // Return the element if it is a single element ConstantStruct.
-    if (CS->getNumOperands() == 1) {
-      unsigned EltReg = getOrCreateVReg(*CS->getOperand(0));
-      EntryBuilder.buildCast(Reg, EltReg);
-      return true;
-    }
-    SmallVector<unsigned, 4> Ops;
-    SmallVector<uint64_t, 4> Indices;
-    uint64_t Offset = 0;
-    for (unsigned i = 0; i < CS->getNumOperands(); ++i) {
-      unsigned OpReg = getOrCreateVReg(*CS->getOperand(i));
-      Ops.push_back(OpReg);
-      Indices.push_back(Offset);
-      Offset += MRI->getType(OpReg).getSizeInBits();
-    }
-    EntryBuilder.buildSequence(Reg, Ops, Indices);
   } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
     if (CV->getNumOperands() == 1)
       return translate(*CV->getOperand(0), Reg);
@@ -1270,7 +1399,7 @@ void IRTranslator::finalizeFunction() {
   // Release the memory used by the different maps we
   // needed during the translation.
   PendingPHIs.clear();
-  ValToVReg.clear();
+  VMap.reset();
   FrameIndices.clear();
   MachinePreds.clear();
   // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
@@ -1330,8 +1459,10 @@ bool IRTranslator::runOnMachineFunction(
   for (const Argument &Arg: F.args()) {
     if (DL->getTypeStoreSize(Arg.getType()) == 0)
       continue; // Don't handle zero sized types.
-    VRegArgs.push_back(getOrCreateVReg(Arg));
+    VRegArgs.push_back(
+        MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
   }
+
   if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                F.getSubprogram(), &F.getEntryBlock());
@@ -1340,14 +1471,28 @@ bool IRTranslator::runOnMachineFunction(
     return false;
   }
 
+  auto ArgIt = F.arg_begin();
+  for (auto &VArg : VRegArgs) {
+    // If the argument is an unsplit scalar then don't use unpackRegs to avoid
+    // creating redundant copies.
+    if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
+      auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
+      assert(VRegs.empty() && "VRegs already populated?");
+      VRegs.push_back(VArg);
+    } else {
+      unpackRegs(*ArgIt, VArg, EntryBuilder);
+    }
+    ArgIt++;
+  }
+
   // And translate the function!
-  for (const BasicBlock &BB: F) {
+  for (const BasicBlock &BB : F) {
     MachineBasicBlock &MBB = getMBB(BB);
     // Set the insertion point of all the following translations to
     // the end of this basic block.
     CurBuilder.setMBB(MBB);
 
-    for (const Instruction &Inst: BB) {
+    for (const Instruction &Inst : BB) {
       if (translate(Inst))
         continue;
 

Modified: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp Wed May 16 03:32:02 2018
@@ -187,6 +187,9 @@ void AArch64CallLowering::splitToValueTy
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
   LLVMContext &Ctx = OrigArg.Ty->getContext();
 
+  if (OrigArg.Ty->isVoidTy())
+    return;
+
   SmallVector<EVT, 4> SplitVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);

Modified: llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp Wed May 16 03:32:02 2018
@@ -469,7 +469,12 @@ bool ARMCallLowering::lowerFormalArgumen
   if (!MBB.empty())
     MIRBuilder.setInstr(*MBB.begin());
 
-  return handleAssignments(MIRBuilder, ArgInfos, ArgHandler);
+  if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
+    return false;
+
+  // Move back to the end of the basic block.
+  MIRBuilder.setMBB(MBB);
+  return true;
 }
 
 namespace {

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll Wed May 16 03:32:02 2018
@@ -32,17 +32,6 @@ define i128 @ABIi128(i128 %arg1) {
   ret i128 %res
 }
 
-; It happens that we don't handle ConstantArray instances yet during
-; translation. Any other constant would be fine too.
-
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate constant: [1 x double] (in function: constant)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for constant
-; FALLBACK-WITH-REPORT-OUT-LABEL: constant:
-; FALLBACK-WITH-REPORT-OUT: fmov d0, #1.0
-define [1 x double] @constant() {
-  ret [1 x double] [double 1.0]
-}
-
   ; The key problem here is that we may fail to create an MBB referenced by a
   ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things
   ; happen.
@@ -185,16 +174,6 @@ end:
   br label %block
 }
 
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(s96), %3:_(p0) :: (store 12 into `%struct96* undef`, align 4) (in function: nonpow2_insertvalue_narrowing)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_insertvalue_narrowing
-; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_insertvalue_narrowing:
-%struct96 = type { float, float, float }
-define void @nonpow2_insertvalue_narrowing(float %a) {
-  %dummy = insertvalue %struct96 undef, float %a, 0
-  store %struct96 %dummy, %struct96* undef
-  ret void
-}
-
 ; FALLBACK-WITH-REPORT-ERR remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3, %4 :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_add_narrowing)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing
 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing:

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll Wed May 16 03:32:02 2018
@@ -709,8 +709,14 @@ define i8* @test_constant_null() {
 
 ; CHECK-LABEL: name: test_struct_memops
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from  %ir.addr, align 4)
-; CHECK: G_STORE [[VAL]](s64), [[ADDR]](p0) :: (store 8 into  %ir.addr, align 4)
+; CHECK: [[VAL1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64)
+; CHECK: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr + 4)
+; CHECK: G_STORE [[VAL1]](s8), [[ADDR]](p0) :: (store 1 into %ir.addr, align 4)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64)
+; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store 4 into %ir.addr + 4)
 define void @test_struct_memops({ i8, i32 }* %addr) {
   %val = load { i8, i32 }, { i8, i32 }* %addr
   store { i8, i32 } %val, { i8, i32 }* %addr
@@ -811,10 +817,10 @@ define float @test_frem(float %arg1, flo
 ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2
 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SADDO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
-; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64)
+; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
 define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
   %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs)
@@ -828,10 +834,10 @@ define void @test_sadd_overflow(i32 %lhs
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2
 ; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UADDE [[LHS]], [[RHS]], [[ZERO]]
-; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
-; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64)
+; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
 define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
   %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs)
@@ -844,10 +850,10 @@ define void @test_uadd_overflow(i32 %lhs
 ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2
 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SSUBO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
-; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64)
+; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4)
 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)
 define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
   %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs)
@@ -861,10 +867,10 @@ define void @test_ssub_overflow(i32 %lhs
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2
 ; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_USUBE [[LHS]], [[RHS]], [[ZERO]]
-; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
-; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64)
+; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4)
 declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
 define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
   %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs)
@@ -877,10 +883,10 @@ define void @test_usub_overflow(i32 %lhs
 ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2
 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SMULO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
-; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64)
+; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
 define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
   %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs)
@@ -893,10 +899,10 @@ define void @test_smul_overflow(i32 %lhs
 ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2
 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UMULO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
-; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64)
+; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
 define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
   %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)
@@ -905,9 +911,18 @@ define void @test_umul_overflow(i32 %lhs
 }
 
 ; CHECK-LABEL: name: test_extractvalue
-; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT]](s128), 64
-; CHECK: $w0 = COPY [[RES]]
+; CHECK: %0:_(p0) = COPY $x0
+; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64)
+; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8)
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64)
+; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12)
+; CHECK: $w0 = COPY [[LD3]](s32)
 %struct.nested = type {i8, { i8, i32 }, i32}
 define i32 @test_extractvalue(%struct.nested* %addr) {
   %struct = load %struct.nested, %struct.nested* %addr
@@ -916,9 +931,22 @@ define i32 @test_extractvalue(%struct.ne
 }
 
 ; CHECK-LABEL: name: test_extractvalue_agg
-; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 32
-; CHECK: G_STORE [[RES]]
+; CHECK: %0:_(p0) = COPY $x0
+; CHECK: %1:_(p0) = COPY $x1
+; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64)
+; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8)
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64)
+; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12)
+; CHECK: G_STORE [[LD2]](s8), %1(p0) :: (store 1 into %ir.addr2, align 4)
+; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %1, [[CST4]](s64)
+; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store 4 into %ir.addr2 + 4)
 define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) {
   %struct = load %struct.nested, %struct.nested* %addr
   %res = extractvalue %struct.nested %struct, 1
@@ -927,10 +955,28 @@ define void @test_extractvalue_agg(%stru
 }
 
 ; CHECK-LABEL: name: test_insertvalue
-; CHECK: [[VAL:%[0-9]+]]:_(s32) = COPY $w1
-; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD
-; CHECK: [[NEWSTRUCT:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[VAL]](s32), 64
-; CHECK: G_STORE [[NEWSTRUCT]](s128),
+; CHECK: %0:_(p0) = COPY $x0
+; CHECK: %1:_(s32) = COPY $w1
+; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64)
+; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8)
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64)
+; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12)
+; CHECK: G_STORE [[LD1]](s8), %0(p0) :: (store 1 into %ir.addr, align 4)
+; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %0, [[CST4]](s64)
+; CHECK: G_STORE [[LD2]](s8), [[GEP4]](p0) :: (store 1 into %ir.addr + 4, align 4)
+; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP %0, [[CST5]](s64)
+; CHECK: G_STORE %1(s32), [[GEP5]](p0) :: (store 4 into %ir.addr + 8)
+; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP %0, [[CST6]](s64)
+; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 12)
 define void @test_insertvalue(%struct.nested* %addr, i32 %val) {
   %struct = load %struct.nested, %struct.nested* %addr
   %newstruct = insertvalue %struct.nested %struct, i32 %val, 1, 1
@@ -942,8 +988,7 @@ define [1 x i64] @test_trivial_insert([1
 ; CHECK-LABEL: name: test_trivial_insert
 ; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0
 ; CHECK: [[VAL:%[0-9]+]]:_(s64) = COPY $x1
-; CHECK: [[RES:%[0-9]+]]:_(s64) = COPY [[VAL]](s64)
-; CHECK: $x0 = COPY [[RES]]
+; CHECK: $x0 = COPY [[VAL]]
   %res = insertvalue [1 x i64] %s, i64 %val, 0
   ret [1 x i64] %res
 }
@@ -952,17 +997,38 @@ define [1 x i8*] @test_trivial_insert_pt
 ; CHECK-LABEL: name: test_trivial_insert_ptr
 ; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0
 ; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_PTRTOINT [[VAL]](p0)
-; CHECK: $x0 = COPY [[RES]]
+; CHECK: $x0 = COPY [[VAL]]
   %res = insertvalue [1 x i8*] %s, i8* %val, 0
   ret [1 x i8*] %res
 }
 
 ; CHECK-LABEL: name: test_insertvalue_agg
-; CHECK: [[SMALLSTRUCT:%[0-9]+]]:_(s64) = G_LOAD
-; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD
-; CHECK: [[RES:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[SMALLSTRUCT]](s64), 32
-; CHECK: G_STORE [[RES]](s128)
+; CHECK: %0:_(p0) = COPY $x0
+; CHECK: %1:_(p0) = COPY $x1
+; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %1(p0) :: (load 1 from %ir.addr2, align 4)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %1, [[CST1]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr2 + 4)
+; CHECK: [[LD3:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64)
+; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[GEP2]](p0) :: (load 1 from %ir.addr + 4, align 4)
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64)
+; CHECK: [[LD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 8)
+; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %0, [[CST4]](s64)
+; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.addr + 12)
+; CHECK: G_STORE [[LD3]](s8), %0(p0) :: (store 1 into %ir.addr, align 4)
+; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP %0, [[CST5]](s64)
+; CHECK: G_STORE [[LD1]](s8), [[GEP5]](p0) :: (store 1 into %ir.addr + 4, align 4)
+; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP %0, [[CST6]](s64)
+; CHECK: G_STORE [[LD2]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 8)
+; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_GEP %0, [[CST7]](s64)
+; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store 4 into %ir.addr + 12)
 define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) {
   %smallstruct = load {i8, i32}, {i8, i32}* %addr2
   %struct = load %struct.nested, %struct.nested* %addr
@@ -1661,3 +1727,135 @@ entry:
   %val = load %zerosize_type, %zerosize_type* %ptr, align 4
   ret %zerosize_type %in
 }
+
+
+define i64 @test_phi_loop(i32 %n) {
+; CHECK-LABEL: name: test_phi_loop
+; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0
+; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+; CHECK: [[CST2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+
+; CHECK: [[PN1:%[0-9]+]]:_(s32) = G_PHI [[ARG1]](s32), %bb.1, [[SUB:%[0-9]+]](s32), %bb.2
+; CHECK: [[PN2:%[0-9]+]]:_(s64) = G_PHI [[CST3]](s64), %bb.1, [[PN3:%[0-9]+]](s64), %bb.2
+; CHECK: [[PN3]]:_(s64) = G_PHI [[CST4]](s64), %bb.1, [[ADD:%[0-9]+]](s64), %bb.2
+; CHECK: [[ADD]]:_(s64) = G_ADD [[PN2]], [[PN3]]
+; CHECK: [[SUB]]:_(s32) = G_SUB [[PN1]], [[CST1]]
+; CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PN1]](s32), [[CST2]]
+; CHECK: G_BRCOND [[CMP]](s1), %bb.3
+; CHECK: G_BR %bb.2
+
+; CHECK: $x0 = COPY [[PN2]](s64)
+; CHECK: RET_ReallyLR implicit $x0
+entry:
+  br label %loop
+
+loop:
+  %counter = phi i32 [ %n, %entry ], [ %counter.dec, %loop ]
+  %elem = phi { i64, i64 } [ { i64 0, i64 1 }, %entry ], [ %updated, %loop ]
+  %prev = extractvalue { i64, i64 } %elem, 0
+  %curr = extractvalue { i64, i64 } %elem, 1
+  %next = add i64 %prev, %curr
+  %shifted = insertvalue { i64, i64 } %elem, i64 %curr, 0
+  %updated = insertvalue { i64, i64 } %shifted, i64 %next, 1
+  %counter.dec = sub i32 %counter, 1
+  %cond = icmp sle i32 %counter, 0
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  %res = extractvalue { i64, i64 } %elem, 0
+  ret i64 %res
+}
+
+define void @test_phi_diamond({ i8, i16, i32 }* %a.ptr, { i8, i16, i32 }* %b.ptr, i1 %selector, { i8, i16, i32 }* %dst) {
+; CHECK-LABEL: name: test_phi_diamond
+; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0
+; CHECK: [[ARG2:%[0-9]+]]:_(p0) = COPY $x1
+; CHECK: [[ARG3:%[0-9]+]]:_(s32) = COPY $w2
+; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ARG3]](s32)
+; CHECK: [[ARG4:%[0-9]+]]:_(p0) = COPY $x3
+; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2
+; CHECK: G_BR %bb.3
+
+; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1]](p0) :: (load 1 from %ir.a.ptr, align 4)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ARG1]], [[CST1]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p0) :: (load 2 from %ir.a.ptr + 2)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ARG1]], [[CST2]](s64)
+; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.a.ptr + 4)
+; CHECK: G_BR %bb.4
+
+; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[ARG2]](p0) :: (load 1 from %ir.b.ptr, align 4)
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[ARG2]], [[CST3]](s64)
+; CHECK: [[LD5:%[0-9]+]]:_(s16) = G_LOAD [[GEP3]](p0) :: (load 2 from %ir.b.ptr + 2)
+; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[ARG2]], [[CST4]](s64)
+; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.b.ptr + 4)
+
+; CHECK: [[PN1:%[0-9]+]]:_(s8) = G_PHI [[LD1]](s8), %bb.2, [[LD4]](s8), %bb.3
+; CHECK: [[PN2:%[0-9]+]]:_(s16) = G_PHI [[LD2]](s16), %bb.2, [[LD5]](s16), %bb.3
+; CHECK: [[PN3:%[0-9]+]]:_(s32) = G_PHI [[LD3]](s32), %bb.2, [[LD6]](s32), %bb.3
+; CHECK: G_STORE [[PN1]](s8), [[ARG4]](p0) :: (store 1 into %ir.dst, align 4)
+; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[ARG4]], [[CST5]](s64)
+; CHECK: G_STORE [[PN2]](s16), [[GEP5]](p0) :: (store 2 into %ir.dst + 2)
+; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[ARG4]], [[CST6]](s64)
+; CHECK: G_STORE [[PN3]](s32), [[GEP6]](p0) :: (store 4 into %ir.dst + 4)
+; CHECK: RET_ReallyLR
+
+entry:
+  br i1 %selector, label %store.a, label %store.b
+
+store.a:
+  %a = load { i8, i16, i32 }, { i8, i16, i32 }* %a.ptr
+  br label %join
+
+store.b:
+  %b = load { i8, i16, i32 }, { i8, i16, i32 }* %b.ptr
+  br label %join
+
+join:
+  %v = phi { i8, i16, i32 } [ %a, %store.a ], [ %b, %store.b ]
+  store { i8, i16, i32 } %v, { i8, i16, i32 }* %dst
+  ret void
+}
+
+%agg.inner.inner = type {i64, i64}
+%agg.inner = type {i16, i8, %agg.inner.inner }
+%agg.nested = type {i32, i32, %agg.inner, i32}
+
+define void @test_nested_aggregate_const(%agg.nested *%ptr) {
+; CHECK-LABEL: name: test_nested_aggregate_const
+; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY $x0
+; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+; CHECK: [[CST2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+; CHECK: [[CST3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[CST6:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+; CHECK: G_STORE [[CST1]](s32), [[BASE]](p0) :: (store 4 into %ir.ptr, align 8)
+; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST7]](s64)
+; CHECK: G_STORE [[CST1]](s32), [[GEP1]](p0) :: (store 4 into %ir.ptr + 4)
+; CHECK: [[CST8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST8]](s64)
+; CHECK: G_STORE [[CST2]](s16), [[GEP2]](p0) :: (store 2 into %ir.ptr + 8, align 8)
+; CHECK: [[CST9:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST9]](s64)
+; CHECK: G_STORE [[CST3]](s8), [[GEP3]](p0) :: (store 1 into %ir.ptr + 10, align 2)
+; CHECK: [[CST10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST10]](s64)
+; CHECK: G_STORE [[CST4]](s64), [[GEP4]](p0) :: (store 8 into %ir.ptr + 16)
+; CHECK: [[CST11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST11]](s64)
+; CHECK: G_STORE [[CST5]](s64), [[GEP5]](p0) :: (store 8 into %ir.ptr + 24)
+; CHECK: [[CST12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST12]](s64)
+; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store 4 into %ir.ptr + 32, align 8)
+  store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, %agg.nested *%ptr
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll Wed May 16 03:32:02 2018
@@ -55,19 +55,25 @@ define void @take_128bit_struct([2 x i64
 }
 
 ; CHECK-LABEL: name: test_split_struct
-; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0)
-; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0
-; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64
+; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP %0, [[CST]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.ptr + 8)
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s128) = G_INSERT [[INS1]], [[LD2]](s64), 64
+; CHECK:  [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 0
+; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 64
 
 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
 ; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]]
-; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0)
+; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]](s64)
+; CHECK: G_STORE [[EXT1]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0)
 
 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
 ; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]]
-; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0)
+; CHECK: G_STORE [[EXT2]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0)
 define void @test_split_struct([2 x i64]* %ptr) {
   %struct = load [2 x i64], [2 x i64]* %ptr
   call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3,

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll Wed May 16 03:32:02 2018
@@ -70,8 +70,16 @@ define void @test_multiple_args(i64 %in)
 ; CHECK: [[ARG1:%[0-9]+]]:_(s192) = G_INSERT [[ARG0]], [[I64]](s64), 64
 ; CHECK: [[ARG2:%[0-9]+]]:_(s192) = G_INSERT [[ARG1]], [[I8]](s8), 128
 ; CHECK: [[ARG:%[0-9]+]]:_(s192) = COPY [[ARG2]]
-
-; CHECK: G_STORE [[ARG]](s192), [[ADDR]](p0)
+; CHECK: [[EXTA0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 0
+; CHECK: [[EXTA1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 64
+; CHECK: [[EXTA2:%[0-9]+]]:_(s8) = G_EXTRACT [[ARG]](s192), 128
+; CHECK: G_STORE [[EXTA0]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64)
+; CHECK: G_STORE [[EXTA1]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64)
+; CHECK: G_STORE [[EXTA2]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8)
 ; CHECK: RET_ReallyLR
 define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) {
   store {double, i64, i8} %in, {double, i64, i8}* %addr
@@ -81,7 +89,18 @@ define void @test_struct_formal({double,
 
 ; CHECK-LABEL: name: test_struct_return
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_LOAD [[ADDR]](p0)
+
+; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64)
+; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 16, align 8)
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[LD2]](s64), 64
+; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[LD3]](s32), 128
 
 ; CHECK: [[DBL:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 0
 ; CHECK: [[I64:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 64
@@ -98,8 +117,22 @@ define {double, i64, i32} @test_struct_r
 
 ; CHECK-LABEL: name: test_arr_call
 ; CHECK: hasCalls: true
-; CHECK: [[ARG:%[0-9]+]]:_(s256) = G_LOAD
-
+; CHECK: %0:_(p0) = COPY $x0
+; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr)
+; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64)
+; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8)
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64)
+; CHECK: [[LD3:%[0-9]+]]:_(s64) = G_LOAD [[GEP2]](p0) :: (load 8 from %ir.addr + 16)
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64)
+; CHECK: [[LD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP3]](p0) :: (load 8 from %ir.addr + 24)
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s256) = G_INSERT [[INS1]], [[LD2]](s64), 64
+; CHECK: [[INS3:%[0-9]+]]:_(s256) = G_INSERT [[INS2]], [[LD3]](s64), 128
+; CHECK: [[ARG:%[0-9]+]]:_(s256) = G_INSERT [[INS3]], [[LD4]](s64), 192
 ; CHECK: [[E0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 0
 ; CHECK: [[E1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 64
 ; CHECK: [[E2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 128
@@ -240,19 +273,26 @@ define void @take_128bit_struct([2 x i64
 }
 
 ; CHECK-LABEL: name: test_split_struct
-; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0)
-; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0
-; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64
+; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
+; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr)
+; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64)
+; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8 from %ir.ptr + 8)
+
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF]], [[LO]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s128) = G_INSERT [[INS1]], [[HI]](s64), 64
+; CHECK: [[EXTLO:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 0
+; CHECK: [[EXTHI:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 64
 
 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
-; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]]
-; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0)
-
+; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[CST2]](s64)
+; CHECK: G_STORE [[EXTLO]](s64), [[GEP2]](p0) :: (store 8 into stack, align 0)
 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
-; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]]
-; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0)
+; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[CST3]](s64)
+; CHECK: G_STORE [[EXTHI]](s64), [[GEP3]](p0) :: (store 8 into stack + 8, align 0)
 define void @test_split_struct([2 x i64]* %ptr) {
   %struct = load [2 x i64], [2 x i64]* %ptr
   call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3,

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll Wed May 16 03:32:02 2018
@@ -19,11 +19,11 @@ declare i32 @llvm.eh.typeid.for(i8*)
 
 ; CHECK:   [[BAD]].{{[a-z]+}} (landing-pad):
 ; CHECK:     EH_LABEL
-; CHECK:     [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
 ; CHECK:     [[PTR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK:     [[VAL_WITH_PTR:%[0-9]+]]:_(s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0
 ; CHECK:     [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1
 ; CHECK:     [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]]
+; CHECK:     [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+; CHECK:     [[VAL_WITH_PTR:%[0-9]+]]:_(s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0
 ; CHECK:     [[PTR_SEL:%[0-9]+]]:_(s128) = G_INSERT [[VAL_WITH_PTR]], [[SEL]](s32), 64
 ; CHECK:     [[PTR_RET:%[0-9]+]]:_(s64) = G_EXTRACT [[PTR_SEL]](s128), 0
 ; CHECK:     [[SEL_RET:%[0-9]+]]:_(s32) = G_EXTRACT [[PTR_SEL]](s128), 64

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll Wed May 16 03:32:02 2018
@@ -16,18 +16,10 @@ declare void @_Unwind_Resume(i8*)
 ; CHECK:     EH_LABEL
 
 ; CHECK:     [[PTR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK:     [[STRUCT_PTR:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR]](p0)
-
 ; CHECK:     [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK:     [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]]
-; CHECK:     [[STRUCT_SEL:%[0-9]+]]:_(s64) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 0
-
-; CHECK:     [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[STRUCT_PTR]](s64)
-; CHECK:     G_STORE [[PTR]](p0), {{%[0-9]+}}(p0)
-
-; CHECK:     [[SEL_TMP:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT_SEL]](s64), 0
-; CHECK:     [[SEL:%[0-9]+]]:_(s32) = COPY [[SEL_TMP]]
-; CHECK:     G_STORE [[SEL]](s32), {{%[0-9]+}}(p0)
+; CHECK:     [[SEL_PTR_INT:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]](p0)
+; CHECK:     G_STORE [[PTR]](p0), %0(p0) :: (store 8 into %ir.exn.slot)
+; CHECK:     G_STORE [[SEL_PTR_INT]](s32), %1(p0) :: (store 4 into %ir.ehselector.slot)
 
 define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
   %exn.slot = alloca i8*

Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll?rev=332449&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll Wed May 16 03:32:02 2018
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple aarch64 -O0 -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+
+%dag = type { { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } }, { { i8, { i8 } }, { i8 } } }
+
+define void @test_const(%dag* %dst) {
+  ; CHECK-LABEL: name: test_const
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+  ; CHECK:   [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 10
+  ; CHECK:   [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 20
+  ; CHECK:   [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50
+  ; CHECK:   STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1)
+  ; CHECK:   STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3)
+  ; CHECK:   STRBBui [[MOVi32imm2]], [[COPY]], 4 :: (store 1 into %ir.dst + 4)
+  ; CHECK:   STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7)
+  ; CHECK:   STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1)
+  ; CHECK:   STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 4 :: (store 1 into %ir.dst + 4)
+  ; CHECK:   STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6)
+  ; CHECK:   STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7)
+  ; CHECK:   RET_ReallyLR
+entry:
+ %updated = insertvalue
+   ; Check that we're visiting constants with shared parts
+   ; (deduplicated via LLVMContext, forming a proper DAG) correctly:
+   %dag {
+     { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } } {
+       { i8, { i8 } } {
+         i8 10,
+         { i8 } { i8 20 }
+       },
+       { { i8, { i8 } }, { i8 } } {
+         { i8, { i8 } } {
+           i8 10,
+           { i8 } { i8 20 }
+         },
+         { i8 } { i8 20 }
+       }
+     },
+     { { i8, { i8 } }, { i8 } } {
+       { i8, { i8 } } {
+         i8 10,
+         { i8 } { i8 20 }
+       },
+       { i8 } { i8 20 }
+     }
+   },
+   { { i8, { i8 } }, { i8 } } {
+     { i8, { i8 } } {
+       i8 10,
+       { i8 } { i8 20 }
+     },
+     { i8 } { i8 50 }
+   },
+   0,
+   1
+ store %dag %updated, %dag* %dst
+ ; 10, 20, 10, 20, 50, 10, 20, 20 sequence is expected
+
+ store
+   ; Check that we didn't overwrite a previously seen constant
+   ; while processing an insertvalue into it:
+   %dag {
+     { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } } {
+       { i8, { i8 } } {
+         i8 10,
+         { i8 } { i8 20 }
+       },
+       { { i8, { i8 } }, { i8 } } {
+         { i8, { i8 } } {
+           i8 10,
+           { i8 } { i8 20 }
+         },
+         { i8 } { i8 20 }
+       }
+     },
+     { { i8, { i8 } }, { i8 } } {
+       { i8, { i8 } } {
+         i8 10,
+         { i8 } { i8 20 }
+       },
+       { i8 } { i8 20 }
+     }
+   },
+   %dag* %dst
+ ; 10, 20, 10, 20, 20, 10, 20, 20 sequence is expected
+ ret void
+}

Modified: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll Wed May 16 03:32:02 2018
@@ -522,7 +522,8 @@ define i32 @test_constantstruct_v2s32()
 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32)
-; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>)
+; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[C3]]
   %vec = extractvalue %struct.v2s32 {<2 x i32><i32 1, i32 2>}, 0
   %elt = extractelement <2 x i32> %vec, i32 0
   ret i32 %elt
@@ -537,12 +538,8 @@ define i32 @test_constantstruct_v2s32_s3
 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32)
 ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
 ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-; CHECK: [[C5:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
-; CHECK: [[C6:%[0-9]+]]:_(s128) = G_INSERT [[C5]], [[VEC]](<2 x s32>), 0
-; CHECK: [[C7:%[0-9]+]]:_(s128) = G_INSERT [[C6]], [[C3]](s32), 64
-; CHECK: [[C8:%[0-9]+]]:_(s128) = G_INSERT [[C7]], [[C4]](s32), 96
-; CHECK: [[EXT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[C8]](s128), 0
-; CHECK: G_EXTRACT_VECTOR_ELT [[EXT]](<2 x s32>)
+; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[C5]](s32)
   %vec = extractvalue %struct.v2s32.s32.s32 {<2 x i32><i32 1, i32 2>, i32 3, i32 4}, 0
   %elt = extractelement <2 x i32> %vec, i32 0
   ret i32 %elt

Modified: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll?rev=332449&r1=332448&r2=332449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll Wed May 16 03:32:02 2018
@@ -30,7 +30,7 @@ define arm_aapcscc i32* @test_call_simpl
 ; CHECK: ADJCALLSTACKDOWN 8, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK-DAG: $r0 = COPY [[BVREG]]
 ; CHECK-DAG: $r1 = COPY [[AVREG]]
-; CHECK-DAG: $r2 = COPY [[BVREG]]
+; CHECK-DxAG: $r2 = COPY [[BVREG]]
 ; CHECK-DAG: $r3 = COPY [[AVREG]]
 ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
 ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -191,8 +191,13 @@ define arm_aapcscc [3 x i32] @test_tiny_
 ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $r0
 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1
 ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32)
+; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 0
+; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 32
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s64)
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64)
 ; CHECK: $r0 = COPY [[R0]]
 ; CHECK: $r1 = COPY [[R1]]
 ; CHECK: BL @tiny_int_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $r0, implicit-def $r1
@@ -201,7 +206,14 @@ define arm_aapcscc [3 x i32] @test_tiny_
 ; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY $r2
 ; CHECK: [[RES_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32)
 ; CHECK: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s96)
+; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 0
+; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 32
+; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 64
+; CHECK: [[IMPDEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
+; CHECK: [[INS3:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0
+; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[INS3]], [[EXT4]](s32), 32
+; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[EXT5]](s32), 64
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS5]](s96)
 ; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say
 ; that composite types larger than 4 bytes should be passed through memory),
 ; but it's what DAGISel does. We should fix it in the common code for both.
@@ -225,9 +237,19 @@ define arm_aapcscc void @test_multiple_i
 ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY $r3
 ; CHECK: [[ARG_ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32)
 ; CHECK: [[ARG_ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32)
+; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 0
+; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 32
+; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 0
+; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 32
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32
+; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0
+; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[EXT4]](s32), 32
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR0]](s64)
-; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR1]](s64)
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64)
+; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS4]](s64)
 ; CHECK: $r0 = COPY [[R0]]
 ; CHECK: $r1 = COPY [[R1]]
 ; CHECK: $r2 = COPY [[R2]]
@@ -259,8 +281,9 @@ define arm_aapcscc void @test_large_int_
 ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]]
 ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]]
 ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32)
+; CHECK: [[INS:%[0-9]+]]:_(s640) = G_INSERT {{.*}}, {{.*}}(s32), 608
 ; CHECK: ADJCALLSTACKDOWN 64, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s640)
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s640)
 ; CHECK: $r0 = COPY [[R0]]
 ; CHECK: $r1 = COPY [[R1]]
 ; CHECK: $r2 = COPY [[R2]]
@@ -301,8 +324,15 @@ define arm_aapcscc [2 x float] @test_fp_
 ; CHECK: [[ARR2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]]
 ; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]]
 ; CHECK: [[ARR_MERGED:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64)
+; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 0
+; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 64
+; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 128
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64
+; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128
 ; CHECK: ADJCALLSTACKDOWN 8, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ARR_MERGED]](s192)
+; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192)
 ; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64)
 ; LITTLE: $r0 = COPY [[ARR0_0]](s32)
 ; LITTLE: $r1 = COPY [[ARR0_1]](s32)
@@ -322,7 +352,12 @@ define arm_aapcscc [2 x float] @test_fp_
 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1
 ; CHECK: [[R_MERGED:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32)
 ; CHECK: ADJCALLSTACKUP 8, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s64)
+; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s64), 0
+; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s64), 32
+; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT4]](s32), 0
+; CHECK: [[INS5:%[0-9]+]]:_(s64) = G_INSERT [[INS4]], [[EXT5]](s32), 32
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS5]](s64)
 ; CHECK: $r0 = COPY [[R0]]
 ; CHECK: $r1 = COPY [[R1]]
 ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1
@@ -358,10 +393,33 @@ define arm_aapcs_vfpcc [4 x float] @test
 ; CHECK: [[X_ARR:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64)
 ; CHECK: [[Y_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32)
 ; CHECK: [[Z_ARR:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64)
+; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 0
+; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 64
+; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 128
+; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 0
+; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 32
+; CHECK: [[EXT6:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 64
+; CHECK: [[EXT7:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 0
+; CHECK: [[EXT8:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 64
+; CHECK: [[EXT9:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 128
+; CHECK: [[EXT10:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 192
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64
+; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128
+; CHECK: [[IMPDEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
+; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[EXT4]](s32), 0
+; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[EXT5]](s32), 32
+; CHECK: [[INS6:%[0-9]+]]:_(s96) = G_INSERT [[INS5]], [[EXT6]](s32), 64
+; CHECK: [[IMPDEF3:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+; CHECK: [[INS7:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF3]], [[EXT7]](s64), 0
+; CHECK: [[INS8:%[0-9]+]]:_(s256) = G_INSERT [[INS7]], [[EXT8]](s64), 64
+; CHECK: [[INS9:%[0-9]+]]:_(s256) = G_INSERT [[INS8]], [[EXT9]](s64), 128
+; CHECK: [[INS10:%[0-9]+]]:_(s256) = G_INSERT [[INS9]], [[EXT10]](s64), 192
 ; CHECK: ADJCALLSTACKDOWN 32, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[X_ARR]](s192)
-; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[Y_ARR]](s96)
-; CHECK: [[Z0:%[0-9]+]]:_(s64), [[Z1:%[0-9]+]]:_(s64), [[Z2:%[0-9]+]]:_(s64), [[Z3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[Z_ARR]](s256)
+; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192)
+; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS6]](s96)
+; CHECK: [[Z0:%[0-9]+]]:_(s64), [[Z1:%[0-9]+]]:_(s64), [[Z2:%[0-9]+]]:_(s64), [[Z3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS10]](s256)
 ; CHECK: $d0 = COPY [[X0]](s64)
 ; CHECK: $d1 = COPY [[X1]](s64)
 ; CHECK: $d2 = COPY [[X2]](s64)
@@ -391,7 +449,16 @@ define arm_aapcs_vfpcc [4 x float] @test
 ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY $s3
 ; CHECK: [[R_MERGED:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32)
 ; CHECK: ADJCALLSTACKUP 32, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s128)
+; CHECK: [[EXT11:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 0
+; CHECK: [[EXT12:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 32
+; CHECK: [[EXT13:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 64
+; CHECK: [[EXT14:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 96
+; CHECK: [[IMPDEF4:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+; CHECK: [[INS11:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF4]], [[EXT11]](s32), 0
+; CHECK: [[INS12:%[0-9]+]]:_(s128) = G_INSERT [[INS11]], [[EXT12]](s32), 32
+; CHECK: [[INS13:%[0-9]+]]:_(s128) = G_INSERT [[INS12]], [[EXT13]](s32), 64
+; CHECK: [[INS14:%[0-9]+]]:_(s128) = G_INSERT [[INS13]], [[EXT14]](s32), 96
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS14]](s128)
 ; CHECK: $s0 = COPY [[R0]]
 ; CHECK: $s1 = COPY [[R1]]
 ; CHECK: $s2 = COPY [[R2]]
@@ -421,8 +488,9 @@ define arm_aapcscc [2 x i32*] @test_toug
 ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]]
 ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]]
 ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32)
+; CHECK: [[INS:%[0-9]+]]:_(s768) = G_INSERT {{.*}}, {{.*}}(s32), 736
 ; CHECK: ADJCALLSTACKDOWN 80, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s768)
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s768)
 ; CHECK: $r0 = COPY [[R0]]
 ; CHECK: $r1 = COPY [[R1]]
 ; CHECK: $r2 = COPY [[R2]]
@@ -442,7 +510,12 @@ define arm_aapcscc [2 x i32*] @test_toug
 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1
 ; CHECK: [[RES_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32)
 ; CHECK: ADJCALLSTACKUP 80, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s64)
+; CHECK: [[EXT1:%[0-9]+]]:_(p0) = G_EXTRACT [[RES_ARR]](s64), 0
+; CHECK: [[EXT2:%[0-9]+]]:_(p0) = G_EXTRACT [[RES_ARR]](s64), 32
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](p0), 0
+; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[INS2]], [[EXT2]](p0), 32
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS3]](s64)
 ; CHECK: $r0 = COPY [[R0]]
 ; CHECK: $r1 = COPY [[R1]]
 ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1
@@ -459,8 +532,13 @@ define arm_aapcscc {i32, i32} @test_stru
 ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY $r0
 ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY $r1
 ; CHECK: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32)
+; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 0
+; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 32
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[X]](s64)
+; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64)
 ; CHECK-DAG: $r0 = COPY [[X0]](s32)
 ; CHECK-DAG: $r1 = COPY [[X1]](s32)
 ; CHECK: BL @structs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $r0, implicit-def $r1
@@ -468,7 +546,12 @@ define arm_aapcscc {i32, i32} @test_stru
 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1
 ; CHECK: [[R:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32)
 ; CHECK: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
-; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R]](s64)
+; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[R]](s64), 0
+; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[R]](s64), 32
+; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0
+; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[EXT4]](s32), 32
+; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS4]](s64)
 ; CHECK: $r0 = COPY [[R0]](s32)
 ; CHECK: $r1 = COPY [[R1]](s32)
 ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1