[llvm] r269012 - [WebAssembly] Move register stackification and coloring to a late phase.

Dan Gohman via llvm-commits llvm-commits at lists.llvm.org
Mon May 9 21:24:02 PDT 2016


Author: djg
Date: Mon May  9 23:24:02 2016
New Revision: 269012

URL: http://llvm.org/viewvc/llvm-project?rev=269012&view=rev
Log:
[WebAssembly] Move register stackification and coloring to a late phase.

Move the register stackification and coloring passes to run very late, after
PEI, tail duplication, and most other passes. This means that all code emitted
and expanded by those passes is now exposed to these passes. This also
eliminates the need for prologue/epilogue code to be manually stackified,
which significantly simplifies the code.

This does require running LiveIntervals a second time. It's useful to think
of these late passes not as late optimization passes, but as a domain-specific
compression algorithm based on knowledge of liveness information. It's used to
compress the code after all conventional optimizations are complete, which is
why it uses LiveIntervals at a phase when actual optimization passes don't
typically need it.

Differential Revision: http://reviews.llvm.org/D20075

Added:
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
Modified:
    llvm/trunk/lib/CodeGen/BranchFolding.cpp
    llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt
    llvm/trunk/lib/Target/WebAssembly/WebAssembly.h
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
    llvm/trunk/test/CodeGen/WebAssembly/byval.ll
    llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll
    llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll
    llvm/trunk/test/CodeGen/WebAssembly/offset.ll
    llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll
    llvm/trunk/test/CodeGen/WebAssembly/store-results.ll
    llvm/trunk/test/CodeGen/WebAssembly/userstack.ll

Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFolding.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original)
+++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Mon May  9 23:24:02 2016
@@ -1845,7 +1845,7 @@ bool BranchFolder::HoistCommonCodeInSucc
       if (!MO.isReg() || !MO.isDef() || MO.isDead())
         continue;
       unsigned Reg = MO.getReg();
-      if (!Reg)
+      if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
         continue;
       LocalDefs.push_back(Reg);
       addRegAndItsAliases(Reg, TRI, LocalDefsSet);

Modified: llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt Mon May  9 23:24:02 2016
@@ -22,13 +22,16 @@ add_llvm_target(WebAssemblyCodeGen
   WebAssemblyLowerBrUnless.cpp
   WebAssemblyMachineFunctionInfo.cpp
   WebAssemblyMCInstLower.cpp
+  WebAssemblyOptimizeLiveIntervals.cpp
   WebAssemblyOptimizeReturned.cpp
   WebAssemblyPeephole.cpp
   WebAssemblyPEI.cpp
+  WebAssemblyPrepareForLiveIntervals.cpp
   WebAssemblyRegisterInfo.cpp
   WebAssemblyRegColoring.cpp
   WebAssemblyRegNumbering.cpp
   WebAssemblyRegStackify.cpp
+  WebAssemblyReplacePhysRegs.cpp
   WebAssemblySelectionDAGInfo.cpp
   WebAssemblySetP2AlignOperands.cpp
   WebAssemblyStoreResults.cpp

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssembly.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssembly.h?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssembly.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssembly.h Mon May  9 23:24:02 2016
@@ -23,17 +23,25 @@ namespace llvm {
 class WebAssemblyTargetMachine;
 class FunctionPass;
 
+// LLVM IR passes.
 FunctionPass *createWebAssemblyOptimizeReturned();
 
+// ISel and immediate followup passes.
 FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel);
 FunctionPass *createWebAssemblyArgumentMove();
 FunctionPass *createWebAssemblySetP2AlignOperands();
 
+// Regalloc-time passes.
+FunctionPass *createWebAssemblyPEI();
+
+// Late passes.
+FunctionPass *createWebAssemblyReplacePhysRegs();
+FunctionPass *createWebAssemblyPrepareForLiveIntervals();
+FunctionPass *createWebAssemblyOptimizeLiveIntervals();
 FunctionPass *createWebAssemblyStoreResults();
 FunctionPass *createWebAssemblyRegStackify();
 FunctionPass *createWebAssemblyRegColoring();
-FunctionPass *createWebAssemblyPEI();
 FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
 FunctionPass *createWebAssemblyCFGStackify();
 FunctionPass *createWebAssemblyLowerBrUnless();

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp Mon May  9 23:24:02 2016
@@ -93,10 +93,7 @@ private:
 //===----------------------------------------------------------------------===//
 
 MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
-  const TargetRegisterClass *TRC =
-      TargetRegisterInfo::isVirtualRegister(RegNo)
-          ? MRI->getRegClass(RegNo)
-          : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+  const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
   for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
     if (TRC->hasType(T))
       return T;
@@ -183,13 +180,6 @@ void WebAssemblyAsmPrinter::EmitFunction
     LocalTypes.push_back(getRegType(VReg));
     AnyWARegs = true;
   }
-  auto &PhysRegs = MFI->getPhysRegs();
-  for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
-    if (PhysRegs[PReg] == -1U)
-      continue;
-    LocalTypes.push_back(getRegType(PReg));
-    AnyWARegs = true;
-  }
   if (AnyWARegs)
     getTargetStreamer()->emitLocal(LocalTypes);
 

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp Mon May  9 23:24:02 2016
@@ -83,8 +83,11 @@ static void writeSPToMemory(unsigned Src
                             MachineBasicBlock::iterator &InsertStore,
                             DebugLoc DL) {
   auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
-  unsigned SPAddr =
-      MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterClass *PtrRC =
+      MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+  unsigned SPAddr = MRI.createVirtualRegister(PtrRC);
+  unsigned Discard = MRI.createVirtualRegister(PtrRC);
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
   BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
@@ -92,13 +95,12 @@ static void writeSPToMemory(unsigned Src
   auto *MMO = new MachineMemOperand(MachinePointerInfo(),
                                     MachineMemOperand::MOStore, 4, 4);
   BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32),
-          SrcReg)
+          Discard)
       .addImm(0)
       .addReg(SPAddr)
       .addImm(2)  // p2align
       .addReg(SrcReg)
       .addMemOperand(MMO);
-  MF.getInfo<WebAssemblyFunctionInfo>()->stackifyVReg(SPAddr);
 }
 
 MachineBasicBlock::iterator
@@ -122,7 +124,6 @@ void WebAssemblyFrameLowering::emitProlo
   auto *MFI = MF.getFrameInfo();
   assert(MFI->getCalleeSavedInfo().empty() &&
          "WebAssembly should not have callee-saved registers");
-  auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
 
   if (!needsSP(MF, *MFI)) return;
   uint64_t StackSize = MFI->getStackSize();
@@ -133,8 +134,10 @@ void WebAssemblyFrameLowering::emitProlo
   auto InsertPt = MBB.begin();
   DebugLoc DL;
 
-  unsigned SPAddr = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
-  unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+  const TargetRegisterClass *PtrRC =
+      MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+  unsigned SPAddr = MRI.createVirtualRegister(PtrRC);
+  unsigned SPReg = MRI.createVirtualRegister(PtrRC);
   auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
   BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
       .addExternalSymbol(SPSymbol);
@@ -151,25 +154,22 @@ void WebAssemblyFrameLowering::emitProlo
       .addReg(SPAddr)  // addr
       .addImm(2)       // p2align
       .addMemOperand(LoadMMO);
-  WFI->stackifyVReg(SPAddr);
 
   if (StackSize) {
     // Subtract the frame size
-    unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
     BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
         .addImm(StackSize);
     BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32),
             WebAssembly::SP32)
         .addReg(SPReg)
         .addReg(OffsetReg);
-    WFI->stackifyVReg(OffsetReg);
-    WFI->stackifyVReg(SPReg);
   }
   if (hasFP(MF)) {
     // Unlike most conventional targets (where FP points to the saved FP),
     // FP points to the bottom of the fixed-size locals, so we can use positive
     // offsets in load/store instructions.
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY_LOCAL_I32),
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY),
             WebAssembly::FP32)
         .addReg(WebAssembly::SP32);
   }
@@ -183,40 +183,31 @@ void WebAssemblyFrameLowering::emitEpilo
   auto *MFI = MF.getFrameInfo();
   uint64_t StackSize = MFI->getStackSize();
   if (!needsSP(MF, *MFI) || !needsSPWriteback(MF, *MFI)) return;
-  auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   auto &MRI = MF.getRegInfo();
   auto InsertPt = MBB.getFirstTerminator();
   DebugLoc DL;
 
-  if (InsertPt != MBB.end()) {
+  if (InsertPt != MBB.end())
     DL = InsertPt->getDebugLoc();
 
-    // If code has been stackified with the return, disconnect it so that we
-    // don't break the tree when we insert code just before the return.
-    if (InsertPt->isReturn() && InsertPt->getNumExplicitOperands() != 0) {
-      WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
-      MFI.unstackifyVReg(InsertPt->getOperand(0).getReg());
-    }
-  }
-
   // Restore the stack pointer. If we had fixed-size locals, add the offset
   // subtracted in the prolog.
   unsigned SPReg = 0;
   MachineBasicBlock::iterator InsertAddr = InsertPt;
   if (StackSize) {
-    unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    const TargetRegisterClass *PtrRC =
+        MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+    unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
     InsertAddr =
         BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
             .addImm(StackSize);
     // In the epilog we don't need to write the result back to the SP32 physreg
     // because it won't be used again. We can use a stackified register instead.
-    SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    SPReg = MRI.createVirtualRegister(PtrRC);
     BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
         .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
         .addReg(OffsetReg);
-    WFI->stackifyVReg(OffsetReg);
-    WFI->stackifyVReg(SPReg);
   } else {
     SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
   }

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h Mon May  9 23:24:02 2016
@@ -39,18 +39,13 @@ class WebAssemblyFunctionInfo final : pu
   ///   - defined and used in LIFO order with other stack registers
   BitVector VRegStackified;
 
-  // One entry for each possible target reg. we expect it to be small.
-  std::vector<unsigned> PhysRegs;
-
   // A virtual register holding the pointer to the vararg buffer for vararg
   // functions. It is created and set in TLI::LowerFormalArguments and read by
   // TLI::LowerVASTART
   unsigned VarargVreg = -1U;
 
  public:
-  explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {
-    PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U);
-  }
+  explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
   ~WebAssemblyFunctionInfo() override;
 
   void addParam(MVT VT) { Params.push_back(VT); }
@@ -69,11 +64,6 @@ class WebAssemblyFunctionInfo final : pu
       VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
     VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
   }
-  void unstackifyVReg(unsigned VReg) {
-    if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
-      return;
-    VRegStackified.reset(TargetRegisterInfo::virtReg2Index(VReg));
-  }
   bool isVRegStackified(unsigned VReg) const {
     if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
       return false;
@@ -87,11 +77,8 @@ class WebAssemblyFunctionInfo final : pu
     WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg;
   }
   unsigned getWAReg(unsigned Reg) const {
-    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-      assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size());
-      return WARegs[TargetRegisterInfo::virtReg2Index(Reg)];
-    }
-    return PhysRegs[Reg];
+    assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size());
+    return WARegs[TargetRegisterInfo::virtReg2Index(Reg)];
   }
   // If new virtual registers are created after initWARegs has been called,
   // this function can be used to add WebAssembly register mappings for them.
@@ -99,13 +86,6 @@ class WebAssemblyFunctionInfo final : pu
     assert(VReg = WARegs.size());
     WARegs.push_back(WAReg);
   }
-
-  void addPReg(unsigned PReg, unsigned WAReg) {
-    assert(PReg < WebAssembly::NUM_TARGET_REGS);
-    assert(WAReg < -1U);
-    PhysRegs[PReg] = WAReg;
-  }
-  const std::vector<unsigned> &getPhysRegs() const { return PhysRegs; }
 };
 
 } // end namespace llvm

Added: llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp?rev=269012&view=auto
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp (added)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp Mon May  9 23:24:02 2016
@@ -0,0 +1,105 @@
+//===--- WebAssemblyOptimizeLiveIntervals.cpp - LiveInterval processing ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Optimize LiveIntervals for use in a post-RA context.
+//
+/// LiveIntervals normally runs before register allocation when the code is
+/// only recently lowered out of SSA form, so it's uncommon for registers to
+/// have multiple defs, and then they do, the defs are usually closely related.
+/// Later, after coalescing, tail duplication, and other optimizations, it's
+/// more common to see registers with multiple unrelated defs. This pass
+/// updates LiveIntervalAnalysis to distribute the value numbers across separate
+/// LiveIntervals.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-optimize-live-intervals"
+
+namespace {
+class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass {
+  const char *getPassName() const override {
+    return "WebAssembly Optimize Live Intervals";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<LiveIntervals>();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addPreservedID(LiveVariablesID);
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyOptimizeLiveIntervals() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyOptimizeLiveIntervals::ID = 0;
+FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() {
+  return new WebAssemblyOptimizeLiveIntervals();
+}
+
+bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n"
+                  "********** Function: "
+               << MF.getName() << '\n');
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+  // We don't preserve SSA form.
+  MRI.leaveSSA();
+
+  assert(MRI.tracksLiveness() &&
+         "OptimizeLiveIntervals expects liveness");
+
+  // Split multiple-VN LiveIntervals into multiple LiveIntervals.
+  SmallVector<LiveInterval*, 4> SplitLIs;
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (MRI.reg_nodbg_empty(Reg))
+      continue;
+
+    LIS.splitSeparateComponents(LIS.getInterval(Reg), SplitLIs);
+    SplitLIs.clear();
+  }
+
+  // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF
+  // instructions to satisfy LiveIntervals' requirement that all uses be
+  // dominated by defs. Now that LiveIntervals has computed which of these
+  // defs are actually needed and which are dead, remove the dead ones.
+  for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE; ) {
+    MachineInstr *MI = &*MII++;
+    if (MI->isImplicitDef() && MI->getOperand(0).isDead()) {
+      LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg());
+      LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*MI).getRegSlot());
+      LIS.RemoveMachineInstrFromMaps(*MI);
+      MI->eraseFromParent();
+    }
+  }
+
+  return false;
+}

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp Mon May  9 23:24:02 2016
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/InlineAsm.h"

Added: llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp?rev=269012&view=auto
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp (added)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp Mon May  9 23:24:02 2016
@@ -0,0 +1,136 @@
+//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Fix up code to meet LiveInterval's requirements.
+///
+/// Some CodeGen passes don't preserve LiveInterval's requirements, because
+/// they run after register allocation and it isn't important. However,
+/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code
+/// to meet LiveIntervals' requirements; primarily, it ensures that all
+/// virtual register uses have definitions (IMPLICIT_DEF definitions if
+/// nothing else).
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-prepare-for-live-intervals"
+
+namespace {
+class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {}
+
+private:
+  const char *getPassName() const override {
+    return "WebAssembly Prepare For LiveIntervals";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyPrepareForLiveIntervals::ID = 0;
+FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
+  return new WebAssemblyPrepareForLiveIntervals();
+}
+
+/// Test whether the given instruction is an ARGUMENT.
+static bool IsArgument(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case WebAssembly::ARGUMENT_I32:
+  case WebAssembly::ARGUMENT_I64:
+  case WebAssembly::ARGUMENT_F32:
+  case WebAssembly::ARGUMENT_F64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+// Test whether the given register has an ARGUMENT def.
+static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
+  for (auto &Def : MRI.def_instructions(Reg))
+    if (IsArgument(&Def))
+      return true;
+  return false;
+}
+
+bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+    dbgs() << "********** Prepare For LiveIntervals **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
+
+  bool Changed = false;
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  MachineBasicBlock &Entry = *MF.begin();
+
+  assert(!mustPreserveAnalysisID(LiveIntervalsID) &&
+         "LiveIntervals shouldn't be active yet!");
+
+  // We don't preserve SSA form.
+  MRI.leaveSSA();
+
+  // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF
+  // instructions. LiveIntervals requires that all paths to virtual register
+  // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to
+  // conservatively satisfy this.
+  //
+  // TODO: This is fairly heavy-handed; find a better approach.
+  //
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+    // Skip unused registers.
+    if (MRI.use_nodbg_empty(Reg))
+      continue;
+
+    // Skip registers that have an ARGUMENT definition.
+    if (HasArgumentDef(Reg, MRI))
+      continue;
+
+    BuildMI(Entry, Entry.begin(), DebugLoc(),
+            TII.get(WebAssembly::IMPLICIT_DEF), Reg);
+    Changed = true;
+  }
+
+  // Move ARGUMENT_* instructions to the top of the entry block, so that their
+  // liveness reflects the fact that these really are live-in values.
+  for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) {
+    MachineInstr *MI = &*MII++;
+    if (IsArgument(MI)) {
+      MI->removeFromParent();
+      Entry.insert(Entry.begin(), MI);
+    }
+  }
+
+  // Ok, we're now ready to run LiveIntervalAnalysis again.
+  MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
+
+  return Changed;
+}

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp Mon May  9 23:24:02 2016
@@ -61,7 +61,6 @@ bool WebAssemblyRegNumbering::runOnMachi
 
   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  const MachineFrameInfo &FrameInfo = *MF.getFrameInfo();
 
   MFI.initWARegs();
 
@@ -73,11 +72,13 @@ bool WebAssemblyRegNumbering::runOnMachi
     case WebAssembly::ARGUMENT_I32:
     case WebAssembly::ARGUMENT_I64:
     case WebAssembly::ARGUMENT_F32:
-    case WebAssembly::ARGUMENT_F64:
+    case WebAssembly::ARGUMENT_F64: {
+      int64_t Imm = MI.getOperand(1).getImm();
       DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg "
-                   << MI.getOperand(1).getImm() << "\n");
-      MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm());
+                   << Imm << "\n");
+      MFI.setWAReg(MI.getOperand(0).getReg(), Imm);
       break;
+    }
     default:
       break;
     }
@@ -107,17 +108,6 @@ bool WebAssemblyRegNumbering::runOnMachi
       MFI.setWAReg(VReg, CurReg++);
     }
   }
-  // Allocate locals for used physical registers
-  bool HasFP =
-      MF.getSubtarget<WebAssemblySubtarget>().getFrameLowering()->hasFP(MF);
-  if (FrameInfo.getStackSize() > 0 || FrameInfo.adjustsStack() || HasFP) {
-    DEBUG(dbgs() << "PReg SP " << CurReg << "\n");
-    MFI.addPReg(WebAssembly::SP32, CurReg++);
-  }
-  if (HasFP) {
-    DEBUG(dbgs() << "PReg FP " << CurReg << "\n");
-    MFI.addPReg(WebAssembly::FP32, CurReg++);
-  }
 
   return true;
 }

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp Mon May  9 23:24:02 2016
@@ -110,6 +110,10 @@ static bool IsSafeToMove(const MachineIn
       continue;
 
     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
+      // from moving down, and we've already checked for that.
+      if (Reg == WebAssembly::ARGUMENTS)
+        continue;
       // If the physical register is never modified, ignore it.
       if (!MRI.isPhysRegModified(Reg))
         continue;
@@ -118,7 +122,7 @@ static bool IsSafeToMove(const MachineIn
     }
 
     // Ask LiveIntervals whether moving this virtual register use or def to
-    // Insert will change value numbers are seen.
+    // Insert will change which value numbers are seen.
     const LiveInterval &LI = LIS.getInterval(Reg);
     VNInfo *DefVNI =
         MO.isDef() ? LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot())
@@ -141,11 +145,23 @@ static bool IsSafeToMove(const MachineIn
 static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
                                      const MachineBasicBlock &MBB,
                                      const MachineRegisterInfo &MRI,
-                                     const MachineDominatorTree &MDT) {
+                                     const MachineDominatorTree &MDT,
+                                     LiveIntervals &LIS) {
+  const LiveInterval &LI = LIS.getInterval(Reg);
+
+  const MachineInstr *OneUseInst = OneUse.getParent();
+  VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
+
   for (const MachineOperand &Use : MRI.use_operands(Reg)) {
     if (&Use == &OneUse)
       continue;
+
     const MachineInstr *UseInst = Use.getParent();
+    VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
+
+    if (UseVNI != OneUseVNI)
+      continue;
+
     const MachineInstr *OneUseInst = OneUse.getParent();
     if (UseInst->getOpcode() == TargetOpcode::PHI) {
       // Test that the PHI use, which happens on the CFG edge rather than
@@ -183,13 +199,33 @@ static unsigned GetTeeLocalOpcode(const
 
 /// A single-use def in the same block with no intervening memory or register
 /// dependencies; move the def down and nest it with the current instruction.
-static MachineInstr *MoveForSingleUse(unsigned Reg, MachineInstr *Def,
+static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
+                                      MachineInstr *Def,
                                       MachineBasicBlock &MBB,
                                       MachineInstr *Insert, LiveIntervals &LIS,
-                                      WebAssemblyFunctionInfo &MFI) {
+                                      WebAssemblyFunctionInfo &MFI,
+                                      MachineRegisterInfo &MRI) {
   MBB.splice(Insert, &MBB, Def);
   LIS.handleMove(*Def);
-  MFI.stackifyVReg(Reg);
+
+  if (MRI.hasOneDef(Reg)) {
+    MFI.stackifyVReg(Reg);
+  } else {
+    unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+    Def->getOperand(0).setReg(NewReg);
+    Op.setReg(NewReg);
+
+    // Tell LiveIntervals about the new register.
+    LIS.createAndComputeVirtRegInterval(NewReg);
+
+    // Tell LiveIntervals about the changes to the old register.
+    LiveInterval &LI = LIS.getInterval(Reg);
+    LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*Def).getRegSlot());
+    LIS.shrinkToUses(&LI);
+
+    MFI.stackifyVReg(NewReg);
+  }
+
   ImposeStackOrdering(Def);
   return Def;
 }
@@ -211,18 +247,23 @@ RematerializeCheapDef(unsigned Reg, Mach
   MFI.stackifyVReg(NewReg);
   ImposeStackOrdering(Clone);
 
+  // Shrink the interval.
+  bool IsDead = MRI.use_empty(Reg);
+  if (!IsDead) {
+    LiveInterval &LI = LIS.getInterval(Reg);
+    LIS.shrinkToUses(&LI);
+    IsDead = !LI.liveAt(LIS.getInstructionIndex(*Def).getDeadSlot());
+  }
+
   // If that was the last use of the original, delete the original.
-  // Otherwise shrink the LiveInterval.
-  if (MRI.use_empty(Reg)) {
+  if (IsDead) {
     SlotIndex Idx = LIS.getInstructionIndex(*Def).getRegSlot();
     LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
-    LIS.removeVRegDefAt(LIS.getInterval(Reg), Idx);
     LIS.removeInterval(Reg);
     LIS.RemoveMachineInstrFromMaps(*Def);
     Def->eraseFromParent();
-  } else {
-    LIS.shrinkToUses(&LIS.getInterval(Reg));
   }
+
   return Clone;
 }
 
@@ -488,13 +529,13 @@ bool WebAssemblyRegStackify::runOnMachin
         bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
                        !TreeWalker.IsOnStack(Reg);
         if (CanMove && MRI.hasOneUse(Reg)) {
-          Insert = MoveForSingleUse(Reg, Def, MBB, Insert, LIS, MFI);
+          Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
         } else if (Def->isAsCheapAsAMove() &&
                    TII->isTriviallyReMaterializable(Def, &AA)) {
           Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI,
                                          MRI, TII, TRI);
         } else if (CanMove &&
-                   OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT)) {
+                   OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS)) {
           Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
                                          MRI, TII);
         } else {
@@ -536,15 +577,13 @@ bool WebAssemblyRegStackify::runOnMachin
   SmallVector<unsigned, 0> Stack;
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
+      if (MI.isDebugValue())
+        continue;
       for (MachineOperand &MO : reverse(MI.explicit_operands())) {
         if (!MO.isReg())
           continue;
         unsigned Reg = MO.getReg();
 
-        // Don't stackify physregs like SP or FP.
-        if (!TargetRegisterInfo::isVirtualRegister(Reg))
-          continue;
-
         if (MFI.isVRegStackified(Reg)) {
           if (MO.isDef())
             Stack.push_back(Reg);

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp Mon May  9 23:24:02 2016
@@ -51,51 +51,6 @@ WebAssemblyRegisterInfo::getReservedRegs
   return Reserved;
 }
 
-static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI,
-                             const MachineOperand& Op) {
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    return TargetRegisterInfo::isVirtualRegister(Reg) &&
-        WFI->isVRegStackified(Reg);
-  }
-  return false;
-}
-
-static bool canStackifyOperand(const MachineInstr& Inst) {
-  unsigned Op = Inst.getOpcode();
-  return Op != TargetOpcode::PHI &&
-      Op != TargetOpcode::INLINEASM &&
-      Op != TargetOpcode::DBG_VALUE;
-}
-
-// Determine if the FI sequence can be stackified, and if so, where the code can
-// be inserted. If stackification is possible, returns true and ajusts II to
-// point to the insertion point.
-bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB,
-                  unsigned OperandNum, MachineBasicBlock::iterator &II) {
-  if (!canStackifyOperand(*II)) return false;
-
-  MachineBasicBlock::iterator InsertPt(II);
-  int StackCount = 0;
-  // Operands are popped in reverse order, so any operands after FIOperand
-  // impose a constraint
-  for (unsigned i = OperandNum; i < II->getNumOperands(); i++) {
-    if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount;
-  }
-  // Walk backwards, tracking stack depth. When it reaches 0 we have reached the
-  // top of the subtree.
-  while (StackCount) {
-    if (InsertPt == MBB.begin()) return false;
-    --InsertPt;
-    for (const auto &def : InsertPt->defs())
-      if (isStackifiedVReg(WFI, def)) --StackCount;
-    for (const auto &use : InsertPt->explicit_uses())
-      if (isStackifiedVReg(WFI, use)) ++StackCount;
-  }
-  II = InsertPt;
-  return true;
-}
-
 void WebAssemblyRegisterInfo::eliminateFrameIndex(
     MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
     RegScavenger * /*RS*/) const {
@@ -104,56 +59,67 @@ void WebAssemblyRegisterInfo::eliminateF
 
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
 
+  // If this is the address operand of a load or store, make it relative to SP
+  // and fold the frame offset directly in.
   if (MI.mayLoadOrStore() && FIOperandNum == WebAssembly::MemOpAddressOperandNo) {
-    // If this is the address operand of a load or store, make it relative to SP
-    // and fold the frame offset directly in.
     assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0);
     int64_t Offset = MI.getOperand(1).getImm() + FrameOffset;
 
-    if (static_cast<uint64_t>(Offset) > std::numeric_limits<uint32_t>::max()) {
-      // If this happens the program is invalid, but better to error here than
-      // generate broken code.
-      report_fatal_error("Memory offset field overflow");
+    if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
+      MI.getOperand(FIOperandNum - 1).setImm(Offset);
+      MI.getOperand(FIOperandNum)
+          .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+      return;
     }
-    MI.getOperand(FIOperandNum - 1).setImm(Offset);
-    MI.getOperand(FIOperandNum)
-        .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
-  } else {
-    // Otherwise calculate the address
-    auto &MRI = MF.getRegInfo();
-    const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-
-    unsigned FIRegOperand = WebAssembly::SP32;
-    if (FrameOffset) {
-      // Create i32.add SP, offset and make it the operand. We want to stackify
-      // this sequence, but we need to preserve the LIFO expr stack ordering
-      // (i.e. we can't insert our code in between MI and any operands it
-      // pops before FIOperand).
-      auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
-      bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II);
-
-      unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
-      BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
-              OffsetOp)
-          .addImm(FrameOffset);
-      if (CanStackifyFI) {
-        WFI->stackifyVReg(OffsetOp);
-        FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
-        WFI->stackifyVReg(FIRegOperand);
-      } else {
-        FIRegOperand = OffsetOp;
+  }
+
+  // If this is an address being added to a constant, fold the frame offset
+  // into the constant.
+  if (MI.getOpcode() == WebAssembly::ADD_I32) {
+    MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum);
+    if (OtherMO.isReg()) {
+      unsigned OtherMOReg = OtherMO.getReg();
+      if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) {
+        MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg);
+        // TODO: For now we just opportunistically do this in the case where
+        // the CONST_I32 happens to have exactly one def and one use. We
+        // should generalize this to optimize in more cases.
+        if (Def && Def->getOpcode() == WebAssembly::CONST_I32 &&
+            MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) {
+          MachineOperand &ImmMO = Def->getOperand(1);
+          ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
+          MI.getOperand(FIOperandNum)
+              .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+          return;
+        }
       }
-      BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
-              FIRegOperand)
-          .addReg(WebAssembly::SP32)
-          .addReg(OffsetOp);
     }
-    MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
   }
+
+  // Otherwise create an i32.add SP, offset and make it the operand.
+  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+  unsigned FIRegOperand = WebAssembly::SP32;
+  if (FrameOffset) {
+    // Create i32.add SP, offset and make it the operand.
+    const TargetRegisterClass *PtrRC =
+        MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+    unsigned OffsetOp = MRI.createVirtualRegister(PtrRC);
+    BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
+            OffsetOp)
+        .addImm(FrameOffset);
+    FIRegOperand = MRI.createVirtualRegister(PtrRC);
+    BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
+            FIRegOperand)
+        .addReg(WebAssembly::SP32)
+        .addReg(OffsetOp);
+  }
+  MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
 }
 
 unsigned

Added: llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp?rev=269012&view=auto
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp (added)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp Mon May  9 23:24:02 2016
@@ -0,0 +1,97 @@
+//===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a pass that replaces physical registers with
+/// virtual registers.
+///
+/// LLVM expects certain physical registers, such as a stack pointer. However,
+/// WebAssembly doesn't actually have such physical registers. This pass is run
+/// once LLVM no longer needs these registers, and replaces them with virtual
+/// registers, so they can participate in register stackifying and coloring in
+/// the normal way.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-replace-phys-regs"
+
+namespace {
+class WebAssemblyReplacePhysRegs final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {}
+
+private:
+  const char *getPassName() const override {
+    return "WebAssembly Replace Physical Registers";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyReplacePhysRegs::ID = 0;
+FunctionPass *llvm::createWebAssemblyReplacePhysRegs() {
+  return new WebAssemblyReplacePhysRegs();
+}
+
+bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+    dbgs() << "********** Replace Physical Registers **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+  bool Changed = false;
+
+  assert(!mustPreserveAnalysisID(LiveIntervalsID) &&
+         "LiveIntervals shouldn't be active yet!");
+  // We don't preserve SSA or liveness.
+  MRI.leaveSSA();
+  MRI.invalidateLiveness();
+
+  for (unsigned PReg = WebAssembly::NoRegister + 1;
+       PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) {
+    // Skip fake registers that are never used explicitly.
+    if (PReg == WebAssembly::EXPR_STACK || PReg == WebAssembly::ARGUMENTS)
+      continue;
+
+    // Replace explicit uses of the physical register with a virtual register.
+    const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg);
+    unsigned VReg = WebAssembly::NoRegister;
+    for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E; ) {
+      MachineOperand &MO = *I++;
+      if (!MO.isImplicit()) {
+        if (VReg == WebAssembly::NoRegister)
+          VReg = MRI.createVirtualRegister(RC);
+        MO.setReg(VReg);
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp Mon May  9 23:24:02 2016
@@ -29,6 +29,7 @@
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -55,6 +56,9 @@ public:
     AU.addPreserved<MachineBlockFrequencyInfo>();
     AU.addRequired<MachineDominatorTree>();
     AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<LiveIntervals>();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -74,57 +78,78 @@ FunctionPass *llvm::createWebAssemblySto
 static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
                                  unsigned FromReg, unsigned ToReg,
                                  const MachineRegisterInfo &MRI,
-                                 MachineDominatorTree &MDT) {
+                                 MachineDominatorTree &MDT,
+                                 LiveIntervals &LIS) {
   bool Changed = false;
+
+  LiveInterval *FromLI = &LIS.getInterval(FromReg);
+  LiveInterval *ToLI = &LIS.getInterval(ToReg);
+
+  SlotIndex FromIdx = LIS.getInstructionIndex(MI).getRegSlot();
+  VNInfo *FromVNI = FromLI->getVNInfoAt(FromIdx);
+
+  SmallVector<SlotIndex, 4> Indices;
+
   for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
     MachineOperand &O = *I++;
     MachineInstr *Where = O.getParent();
-    if (Where->getOpcode() == TargetOpcode::PHI) {
-      // PHIs use their operands on their incoming CFG edges rather than
-      // in their parent blocks. Get the basic block paired with this use
-      // of FromReg and check that MI's block dominates it.
-      MachineBasicBlock *Pred =
-          Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
-      if (!MDT.dominates(&MBB, Pred))
-        continue;
-    } else {
-      // For a non-PHI, check that MI dominates the instruction in the
-      // normal way.
-      if (&MI == Where || !MDT.dominates(&MI, Where))
-        continue;
-    }
+
+    // Check that MI dominates the instruction in the normal way.
+    if (&MI == Where || !MDT.dominates(&MI, Where))
+      continue;
+
+    // If this use gets a different value, skip it.
+    SlotIndex WhereIdx = LIS.getInstructionIndex(*Where);
+    VNInfo *WhereVNI = FromLI->getVNInfoAt(WhereIdx);
+    if (WhereVNI && WhereVNI != FromVNI)
+      continue;
+
+    // Make sure ToReg isn't clobbered before it gets there.
+    VNInfo *ToVNI = ToLI->getVNInfoAt(WhereIdx);
+    if (ToVNI && ToVNI != FromVNI)
+      continue;
+
     Changed = true;
     DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from "
                  << MI << "\n");
     O.setReg(ToReg);
-    // If the store's def was previously dead, it is no longer. But the
-    // dead flag shouldn't be set yet.
-    assert(!MI.getOperand(0).isDead() && "Unexpected dead flag");
+
+    // If the store's def was previously dead, it is no longer.
+    MI.getOperand(0).setIsDead(false);
+
+    Indices.push_back(WhereIdx.getRegSlot());
+  }
+
+  if (Changed) {
+    // Extend ToReg's liveness.
+    LIS.extendToIndices(*ToLI, Indices);
+
+    // Shrink FromReg's liveness.
+    LIS.shrinkToUses(FromLI);
+
+    // If we replaced all dominated uses, FromReg is now killed at MI.
+    if (!FromLI->liveAt(FromIdx.getDeadSlot()))
+      MI.addRegisterKilled(FromReg,
+                           MBB.getParent()->getSubtarget<WebAssemblySubtarget>()
+                                 .getRegisterInfo());
   }
+
   return Changed;
 }
 
 static bool optimizeStore(MachineBasicBlock &MBB, MachineInstr &MI,
                           const MachineRegisterInfo &MRI,
-                          MachineDominatorTree &MDT) {
-  const auto &Stored = MI.getOperand(WebAssembly::StoreValueOperandNo);
-  switch (Stored.getType()) {
-  case MachineOperand::MO_Register: {
-    unsigned ToReg = MI.getOperand(0).getReg();
-    unsigned FromReg = Stored.getReg();
-    return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
-  }
-  case MachineOperand::MO_FrameIndex:
-    // TODO: optimize.
-    return false;
-  default:
-    report_fatal_error("Store results: store not consuming reg or frame index");
-  }
+                          MachineDominatorTree &MDT,
+                          LiveIntervals &LIS) {
+  unsigned ToReg = MI.getOperand(0).getReg();
+  unsigned FromReg = MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
+  return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
 }
 
 static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
                          const MachineRegisterInfo &MRI,
                          MachineDominatorTree &MDT,
+                         LiveIntervals &LIS,
                          const WebAssemblyTargetLowering &TLI,
                          const TargetLibraryInfo &LibInfo) {
   MachineOperand &Op1 = MI.getOperand(1);
@@ -142,23 +167,12 @@ static bool optimizeCall(MachineBasicBlo
   if (!LibInfo.getLibFunc(Name, Func))
     return false;
 
-  const auto &Op2 = MI.getOperand(2);
-  switch (Op2.getType()) {
-  case MachineOperand::MO_Register: {
-    unsigned FromReg = Op2.getReg();
-    unsigned ToReg = MI.getOperand(0).getReg();
-    if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
-      report_fatal_error("Store results: call to builtin function with wrong "
-                         "signature, from/to mismatch");
-    return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
-  }
-  case MachineOperand::MO_FrameIndex:
-    // TODO: optimize.
-    return false;
-  default:
+  unsigned FromReg = MI.getOperand(2).getReg();
+  unsigned ToReg = MI.getOperand(0).getReg();
+  if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
     report_fatal_error("Store results: call to builtin function with wrong "
-                       "signature, not consuming reg or frame index");
-  }
+                       "signature, from/to mismatch");
+  return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
 }
 
 bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
@@ -167,14 +181,18 @@ bool WebAssemblyStoreResults::runOnMachi
            << "********** Function: " << MF.getName() << '\n';
   });
 
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
   const WebAssemblyTargetLowering &TLI =
       *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
   const auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
   bool Changed = false;
 
-  assert(MRI.isSSA() && "StoreResults depends on SSA form");
+  // We don't preserve SSA form.
+  MRI.leaveSSA();
+
+  assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking");
 
   for (auto &MBB : MF) {
     DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
@@ -191,11 +209,11 @@ bool WebAssemblyStoreResults::runOnMachi
       case WebAssembly::STORE_F64:
       case WebAssembly::STORE_I32:
       case WebAssembly::STORE_I64:
-        Changed |= optimizeStore(MBB, MI, MRI, MDT);
+        Changed |= optimizeStore(MBB, MI, MRI, MDT, LIS);
         break;
       case WebAssembly::CALL_I32:
       case WebAssembly::CALL_I64:
-        Changed |= optimizeCall(MBB, MI, MRI, MDT, TLI, LibInfo);
+        Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo);
         break;
       }
   }

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp Mon May  9 23:24:02 2016
@@ -103,8 +103,6 @@ public:
 
   void addIRPasses() override;
   bool addInstSelector() override;
-  bool addILPOpts() override;
-  void addPreRegAlloc() override;
   void addPostRegAlloc() override;
   bool addGCPasses() override { return false; }
   void addPreEmitPass() override;
@@ -162,19 +160,6 @@ bool WebAssemblyPassConfig::addInstSelec
   return false;
 }
 
-bool WebAssemblyPassConfig::addILPOpts() {
-  (void)TargetPassConfig::addILPOpts();
-  return true;
-}
-
-void WebAssemblyPassConfig::addPreRegAlloc() {
-  TargetPassConfig::addPreRegAlloc();
-
-  // Prepare store instructions for register stackifying.
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createWebAssemblyStoreResults());
-}
-
 void WebAssemblyPassConfig::addPostRegAlloc() {
   // TODO: The following CodeGen passes don't currently support code containing
   // virtual registers. Consider removing their restrictions and re-enabling
@@ -194,14 +179,6 @@ void WebAssemblyPassConfig::addPostRegAl
   disablePass(&LiveDebugValuesID);
   disablePass(&PatchableFunctionID);
 
-  if (getOptLevel() != CodeGenOpt::None) {
-    // Mark registers as representing wasm's expression stack.
-    addPass(createWebAssemblyRegStackify());
-
-    // Run the register coloring pass to reduce the total number of registers.
-    addPass(createWebAssemblyRegColoring());
-  }
-
   TargetPassConfig::addPostRegAlloc();
 
   // Run WebAssembly's version of the PrologEpilogInserter. Target-independent
@@ -213,6 +190,33 @@ void WebAssemblyPassConfig::addPostRegAl
 void WebAssemblyPassConfig::addPreEmitPass() {
   TargetPassConfig::addPreEmitPass();
 
+  // Now that we have a prologue and epilogue and all frame indices are
+  // rewritten, eliminate SP and FP. This allows them to be stackified,
+  // colored, and numbered with the rest of the registers.
+  addPass(createWebAssemblyReplacePhysRegs());
+
+  if (getOptLevel() != CodeGenOpt::None) {
+    // LiveIntervals isn't commonly run this late. Re-establish preconditions.
+    addPass(createWebAssemblyPrepareForLiveIntervals());
+
+    // Depend on LiveIntervals and perform some optimizations on it.
+    addPass(createWebAssemblyOptimizeLiveIntervals());
+
+    // Prepare store instructions for register stackifying.
+    addPass(createWebAssemblyStoreResults());
+
+    // Mark registers as representing wasm's expression stack. This is a key
+    // code-compression technique in WebAssembly. We run this pass (and
+    // StoreResults above) very late, so that it sees as much code as possible,
+    // including code emitted by PEI and expanded by late tail duplication.
+    addPass(createWebAssemblyRegStackify());
+
+    // Run the register coloring pass to reduce the total number of registers.
+    // This runs after stackification so that it doesn't consider registers
+    // that become stackified.
+    addPass(createWebAssemblyRegColoring());
+  }
+
   // Eliminate multiple-entry loops.
   addPass(createWebAssemblyFixIrreducibleControlFlow());
 

Modified: llvm/trunk/test/CodeGen/WebAssembly/byval.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/byval.ll?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/byval.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/byval.ll Mon May  9 23:24:02 2016
@@ -23,26 +23,27 @@ declare void @ext_byval_func_empty(%Empt
 ; CHECK-LABEL: byval_arg
 define void @byval_arg(%SmallStruct* %ptr) {
  ; CHECK: .param i32
+ ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer
  ; Subtract 16 from SP (SP is 16-byte aligned)
  ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
  ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
  ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L2]], $pop[[L3]]
  ; Ensure SP is stored back before the call
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
+ ; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop[[L4]]), $pop[[L10]]{{$}}
+ ; CHECK-NEXT: tee_local $push[[L11:.+]]=, $[[SP:.+]]=, $pop[[L12]]{{$}}
  ; Copy the SmallStruct argument to the stack (SP+12, original SP-4)
- ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0)
- ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]]
+ ; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0)
+ ; CHECK-NEXT: i32.store $discard=, 12($pop[[L11]]), $pop[[L0]]
  ; Pass a pointer to the stack slot to the function
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
- ; CHECK-NEXT: call ext_byval_func at FUNCTION, $pop[[ARG]]
+ ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12{{$}}
+ ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, $[[SP]], $pop[[L5]]{{$}}
+ ; CHECK-NEXT: call ext_byval_func at FUNCTION, $pop[[ARG]]{{$}}
  call void @ext_byval_func(%SmallStruct* byval %ptr)
  ; Restore the stack
  ; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer
  ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L8:.+]]=, [[SP]], $pop[[L6]]
+ ; CHECK-NEXT: i32.add $push[[L8:.+]]=, $[[SP]], $pop[[L6]]
  ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), $pop[[L8]]
  ; CHECK-NEXT: return
  ret void
@@ -53,14 +54,16 @@ define void @byval_arg_align8(%SmallStru
  ; CHECK: .param i32
  ; Don't check the entire SP sequence, just enough to get the alignment.
  ; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop{{.+}}), $pop[[L10]]{{$}}
+ ; CHECK-NEXT: tee_local $push[[L11:.+]]=, $[[SP:.+]]=, $pop[[L12]]{{$}}
  ; Copy the SmallStruct argument to the stack (SP+8, original SP-8)
- ; CHECK: i32.load $push[[L4:.+]]=, 0($0){{$}}
- ; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]), $pop[[L4]]{{$}}
+ ; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0){{$}}
+ ; CHECK-NEXT: i32.store $discard=, 8($pop[[L11]]), $pop[[L0]]{{$}}
  ; Pass a pointer to the stack slot to the function
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8
- ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
- ; CHECK-NEXT: call ext_byval_func_align8 at FUNCTION, $pop[[ARG]]
+ ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8{{$}}
+ ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, $[[SP]], $pop[[L5]]{{$}}
+ ; CHECK-NEXT: call ext_byval_func_align8 at FUNCTION, $pop[[ARG]]{{$}}
  call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr)
  ret void
 }
@@ -70,13 +73,15 @@ define void @byval_arg_double(%AlignedSt
  ; CHECK: .param i32
  ; Subtract 16 from SP (SP is 16-byte aligned)
  ; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.store $push[[L15:.+]]=, {{.+}}, $pop[[L12]]
+ ; CHECK-NEXT: tee_local $push[[L14:.+]]=, $[[SP:.+]]=, $pop[[L15]]
  ; Copy the AlignedStruct argument to the stack (SP+0, original SP-16)
  ; Just check the last load/store pair of the memcpy
  ; CHECK: i64.load $push[[L4:.+]]=, 0($0)
- ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]]
+ ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L4]]
  ; Pass a pointer to the stack slot to the function
- ; CHECK-NEXT: call ext_byval_func_alignedstruct at FUNCTION, [[SP]]
+ ; CHECK-NEXT: call ext_byval_func_alignedstruct at FUNCTION, $[[SP]]
  tail call void @ext_byval_func_alignedstruct(%AlignedStruct* byval %ptr)
  ret void
 }
@@ -108,11 +113,15 @@ define void @byval_empty_callee(%EmptySt
 
 ; Call memcpy for "big" byvals.
 ; CHECK-LABEL: big_byval:
+; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer
 ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
 ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
 ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 131072
-; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
-; CHECK:      i32.call       ${{[^,]+}}=, memcpy at FUNCTION,
+; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]]
+; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}}
+; CHECK-NEXT: i32.const $push[[L0:.+]]=, 131072
+; CHECK-NEXT: i32.call       $push[[L11:.+]]=, memcpy at FUNCTION, $pop{{.+}}, ${{.+}}, $pop{{.+}}
+; CHECK-NEXT: tee_local      $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L11]]{{$}}
 ; CHECK-NEXT: call           big_byval_callee at FUNCTION,
 %big = type [131072 x i8]
 declare void @big_byval_callee(%big* byval align 1)

Modified: llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll Mon May  9 23:24:02 2016
@@ -104,17 +104,17 @@ back:
 ; CHECK-NOT: local
 ; CHECK: block{{$}}
 ; CHECK: br_if 0, {{[^,]+}}{{$}}
-; CHECK: .LBB2_1:
+; CHECK: .LBB2_{{[0-9]+}}:
 ; CHECK: br_if 0, ${{[0-9]+}}{{$}}
-; CHECK: .LBB2_2:
+; CHECK: .LBB2_{{[0-9]+}}:
 ; CHECK: return{{$}}
 ; OPT-LABEL: test2:
 ; OPT-NOT: local
 ; OPT: block{{$}}
 ; OPT: br_if 0, {{[^,]+}}{{$}}
-; OPT: .LBB2_1:
+; OPT: .LBB2_{{[0-9]+}}:
 ; OPT: br_if 0, ${{[0-9]+}}{{$}}
-; OPT: .LBB2_2:
+; OPT: .LBB2_{{[0-9]+}}:
 ; OPT: return{{$}}
 define void @test2(double* nocapture %p, i32 %n) {
 entry:
@@ -393,36 +393,32 @@ exit:
 ; CHECK: .LBB11_1:
 ; CHECK: loop{{$}}
 ; CHECK: block{{$}}
-; CHECK: block{{$}}
 ; CHECK: br_if           0, $0{{$}}
 ; CHECK: br              1{{$}}
 ; CHECK: .LBB11_3:
+; CHECK: end_block{{$}}
 ; CHECK: block{{$}}
 ; CHECK: br_if           0, $1{{$}}
 ; CHECK: br              1{{$}}
 ; CHECK: .LBB11_5:
-; CHECK: .LBB11_6:
 ; CHECK: br              0{{$}}
-; CHECK: .LBB11_7:
+; CHECK: .LBB11_6:
 ; CHECK-NEXT: end_loop{{$}}
 ; OPT-LABEL: doublediamond_in_a_loop:
 ; OPT:      .LBB11_1:
 ; OPT:      loop{{$}}
 ; OPT:      block{{$}}
-; OPT-NEXT: block{{$}}
-; OPT-NEXT: block{{$}}
 ; OPT:      br_if           0, {{[^,]+}}{{$}}
-; OPT:      br_if           1, {{[^,]+}}{{$}}
+; OPT:      block{{$}}
+; OPT:      br_if           0, {{[^,]+}}{{$}}
 ; OPT:      br              2{{$}}
 ; OPT-NEXT: .LBB11_4:
 ; OPT-NEXT: end_block{{$}}
 ; OPT:      br              1{{$}}
 ; OPT:      .LBB11_5:
 ; OPT-NEXT: end_block{{$}}
-; OPT:      .LBB11_6:
-; OPT-NEXT: end_block{{$}}
 ; OPT:      br              0{{$}}
-; OPT:      .LBB11_7:
+; OPT:      .LBB11_6:
 ; OPT-NEXT: end_loop{{$}}
 define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) {
 entry:
@@ -756,33 +752,19 @@ u1:
 ; CHECK-LABEL: test8:
 ; CHECK:       .LBB17_1:
 ; CHECK-NEXT:  loop{{$}}
-; CHECK-NEXT:  block{{$}}
-; CHECK-NOT:   block
-; CHECK:       br_if    0, {{[^,]+}}{{$}}
-; CHECK-NOT:   block
-; CHECK:       br_if    1, {{[^,]+}}{{$}}
-; CHECK-NEXT:  .LBB17_3:
-; CHECK-NEXT:  end_block{{$}}
-; CHECK-NEXT:  loop{{$}}
 ; CHECK-NEXT:  i32.const $push{{[^,]+}}, 0{{$}}
 ; CHECK-NEXT:  br_if    0, {{[^,]+}}{{$}}
-; CHECK-NEXT:  br       2{{$}}
-; CHECK-NEXT:  .LBB17_4:
+; CHECK-NEXT:  br       0{{$}}
+; CHECK-NEXT:  .LBB17_2:
+; CHECK-NEXT:  end_loop{{$}}
 ; OPT-LABEL: test8:
 ; OPT:       .LBB17_1:
 ; OPT-NEXT:  loop{{$}}
-; OPT-NEXT:  block{{$}}
-; OPT-NOT:   block
-; OPT:       br_if    0, {{[^,]+}}{{$}}
-; OPT-NOT:   block
-; OPT:       br_if    1, {{[^,]+}}{{$}}
-; OPT-NEXT:  .LBB17_3:
-; OPT-NEXT:  end_block{{$}}
-; OPT-NEXT:  loop{{$}}
 ; OPT-NEXT:  i32.const $push{{[^,]+}}, 0{{$}}
 ; OPT-NEXT:  br_if    0, {{[^,]+}}{{$}}
-; OPT-NEXT:  br       2{{$}}
-; OPT-NEXT:  .LBB17_4:
+; OPT-NEXT:  br       0{{$}}
+; OPT-NEXT:  .LBB17_2:
+; OPT-NEXT:  end_loop{{$}}
 define i32 @test8() {
 bb:
   br label %bb1
@@ -1195,10 +1177,9 @@ bb5:
 ; CHECK-NEXT:     loop{{$}}
 ; CHECK-NEXT:     i32.const   $push0=, 0{{$}}
 ; CHECK-NEXT:     br_if       0, $pop0{{$}}
-; CHECK-NEXT: .LBB23_2:{{$}}
 ; CHECK-NEXT:     end_loop{{$}}
+; CHECK-NEXT: .LBB23_3:{{$}}
 ; CHECK-NEXT:     loop{{$}}
-; CHECK-NEXT:     i32.const   $discard=, 0{{$}}
 ; CHECK-NEXT:     i32.const   $push1=, 0{{$}}
 ; CHECK-NEXT:     br_if       0, $pop1{{$}}
 ; CHECK-NEXT:     end_loop{{$}}

Modified: llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll Mon May  9 23:24:02 2016
@@ -61,8 +61,8 @@ define void @set_no(i8* %dst, i8 %src, i
 
 
 ; CHECK-LABEL: frame_index:
-; CHECK: i32.call $discard=, memset at FUNCTION, $pop12, $pop1, $pop0{{$}}
-; CHECK: i32.call $discard=, memset at FUNCTION, $0, $pop3, $pop2{{$}}
+; CHECK: i32.call $discard=, memset at FUNCTION, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
+; CHECK: i32.call $push{{[0-9]+}}=, memset at FUNCTION, ${{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
 ; CHECK: return{{$}}
 define void @frame_index() {
 entry:
@@ -76,15 +76,48 @@ entry:
 }
 
 ; If the result value of memset doesn't get stackified, it should be marked
-; $discard.
+; $discard. Note that we use a call to prevent tail dup so that we can test
+; this specific functionality.
 
 ; CHECK-LABEL: discard_result:
 ; CHECK: i32.call $discard=, memset at FUNCTION, $0, $1, $2
 declare i8* @def()
+declare void @block_tail_dup()
 define i8* @discard_result(i8* %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
 bb:
   %tmp = icmp eq i32 %arg3, 0
   br i1 %tmp, label %bb5, label %bb9
+
+bb5:
+  %tmp6 = icmp eq i32 %arg4, 0
+  br i1 %tmp6, label %bb7, label %bb8
+
+bb7:
+  call void @llvm.memset.p0i8.i32(i8* %arg, i8 %arg1, i32 %arg2, i32 1, i1 false)
+  br label %bb11
+
+bb8:
+  br label %bb11
+
+bb9:
+  %tmp10 = call i8* @def()
+  br label %bb11
+
+bb11:
+  %tmp12 = phi i8* [ %arg, %bb7 ], [ %arg, %bb8 ], [ %tmp10, %bb9 ]
+  call void @block_tail_dup()
+  ret i8* %tmp12
+}
+
+; This is the same as discard_result, except we let tail dup happen, so the
+; result of the memset *is* stackified.
+
+; CHECK-LABEL: tail_dup_to_reuse_result:
+; CHECK: i32.call $push{{[0-9]+}}=, memset at FUNCTION, $0, $1, $2
+define i8* @tail_dup_to_reuse_result(i8* %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
+bb:
+  %tmp = icmp eq i32 %arg3, 0
+  br i1 %tmp, label %bb5, label %bb9
 
 bb5:
   %tmp6 = icmp eq i32 %arg4, 0

Modified: llvm/trunk/test/CodeGen/WebAssembly/offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/offset.ll?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/offset.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/offset.ll Mon May  9 23:24:02 2016
@@ -397,9 +397,9 @@ define void @aggregate_load_store({i32,i
 ; merged into i64 stores.
 
 ; CHECK-LABEL: aggregate_return:
-; CHECK: i64.const   $push0=, 0{{$}}
-; CHECK: i64.store   $push1=, 8($0):p2align=2, $pop0{{$}}
-; CHECK: i64.store   $discard=, 0($0):p2align=2, $pop1{{$}}
+; CHECK: i64.const   $push[[L0:[0-9]+]]=, 0{{$}}
+; CHECK: i64.store   $push[[L1:[0-9]+]]=, 8($0):p2align=2, $pop[[L0]]{{$}}
+; CHECK: i64.store   $discard=, 0($0):p2align=2, $pop[[L1]]{{$}}
 define {i32,i32,i32,i32} @aggregate_return() {
   ret {i32,i32,i32,i32} zeroinitializer
 }
@@ -408,12 +408,12 @@ define {i32,i32,i32,i32} @aggregate_retu
 ; merged.
 
 ; CHECK-LABEL: aggregate_return_without_merge:
-; CHECK: i32.const   $push0=, 0{{$}}
-; CHECK: i32.store8  $push1=, 14($0), $pop0{{$}}
-; CHECK: i32.store16 $push2=, 12($0), $pop1{{$}}
-; CHECK: i32.store   $discard=, 8($0), $pop2{{$}}
-; CHECK: i64.const   $push3=, 0{{$}}
-; CHECK: i64.store   $discard=, 0($0), $pop3{{$}}
+; CHECK: i32.const   $push[[L0:[0-9]+]]=, 0{{$}}
+; CHECK: i32.store8  $push[[L1:[0-9]+]]=, 14($0), $pop[[L0]]{{$}}
+; CHECK: i32.store16 $push[[L2:[0-9]+]]=, 12($0), $pop[[L1]]{{$}}
+; CHECK: i32.store   $discard=, 8($0), $pop[[L2]]{{$}}
+; CHECK: i64.const   $push[[L3:[0-9]+]]=, 0{{$}}
+; CHECK: i64.store   $discard=, 0($0), $pop[[L3]]{{$}}
 define {i64,i32,i16,i8} @aggregate_return_without_merge() {
   ret {i64,i32,i16,i8} zeroinitializer
 }

Modified: llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll Mon May  9 23:24:02 2016
@@ -194,9 +194,9 @@ entry:
 ; CHECK-LABEL: simple_multiple_use:
 ; CHECK:  .param      i32, i32{{$}}
 ; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
-; CHECK-NEXT:  tee_local   $push[[NUM1:[0-9]+]]=, $0=, $pop[[NUM0]]{{$}}
+; CHECK-NEXT:  tee_local   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
 ; CHECK-NEXT:  call        use_a at FUNCTION, $pop[[NUM1]]{{$}}
-; CHECK-NEXT:  call        use_b at FUNCTION, $0{{$}}
+; CHECK-NEXT:  call        use_b at FUNCTION, $[[NUM2]]{{$}}
 ; CHECK-NEXT:  return{{$}}
 declare void @use_a(i32)
 declare void @use_b(i32)
@@ -212,8 +212,8 @@ define void @simple_multiple_use(i32 %x,
 ; CHECK-LABEL: multiple_uses_in_same_insn:
 ; CHECK:  .param      i32, i32{{$}}
 ; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
-; CHECK-NEXT:  tee_local   $push[[NUM1:[0-9]+]]=, $0=, $pop[[NUM0]]{{$}}
-; CHECK-NEXT:  call        use_2 at FUNCTION, $pop[[NUM1]], $0{{$}}
+; CHECK-NEXT:  tee_local   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
+; CHECK-NEXT:  call        use_2 at FUNCTION, $pop[[NUM1]], $[[NUM2]]{{$}}
 ; CHECK-NEXT:  return{{$}}
 declare void @use_2(i32, i32)
 define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
@@ -273,7 +273,6 @@ define i32 @no_stackify_past_use(i32 %ar
 ; CHECK-NEXT:   tee_local       $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
 ; CHECK-NEXT:   f64.select      $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}}
 ; CHECK:        $[[NUM2]]=,
-; CHECK:        $[[NUM2]]=,
 define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) {
 bb:
   br label %bb5
@@ -325,9 +324,9 @@ define i32 @no_stackify_call_past_load()
 
 ; Don't move stores past loads if there may be aliasing
 ; CHECK-LABEL: no_stackify_store_past_load
-; CHECK: i32.store {{.*}}, 0($1), $0
+; CHECK: i32.store $[[L0:[0-9]+]]=, 0($1), $0
 ; CHECK: i32.load {{.*}}, 0($2)
-; CHECK: i32.call {{.*}}, callee at FUNCTION, $0
+; CHECK: i32.call {{.*}}, callee at FUNCTION, $[[L0]]{{$}}
 define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) {
   store i32 %a, i32* %p1
   %b = load i32, i32* %p2, align 4

Modified: llvm/trunk/test/CodeGen/WebAssembly/store-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/store-results.ll?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/store-results.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/store-results.ll Mon May  9 23:24:02 2016
@@ -61,7 +61,8 @@ for.cond.cleanup4.i:
 }
 
 ; CHECK-LABEL: fi_ret:
-; CHECK: i32.store $discard=,
+; CHECK: i32.store $push0=,
+; CHECK: return $pop0{{$}}
 define hidden i8* @fi_ret(i8** %addr) {
 entry:
   %buf = alloca [27 x i8], align 16

Modified: llvm/trunk/test/CodeGen/WebAssembly/userstack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/userstack.ll?rev=269012&r1=269011&r2=269012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/userstack.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/userstack.ll Mon May  9 23:24:02 2016
@@ -12,19 +12,20 @@ declare void @ext_func_i32(i32* %ptr)
 ; Check that there is an extra local for the stack pointer.
 ; CHECK: .local i32{{$}}
 define void @alloca32() noredzone {
+ ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
  ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer{{$}}
  ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
  ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
+ ; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}}
+ ; CHECK-NEXT: tee_local $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L10]]{{$}}
  %retval = alloca i32
  ; CHECK: i32.const $push[[L0:.+]]=, 0
- ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]]
+ ; CHECK: i32.store {{.*}}=, 12($pop[[L9]]), $pop[[L0]]
  store i32 0, i32* %retval
  ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
  ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]]
  ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
  ret void
 }
@@ -32,17 +33,18 @@ define void @alloca32() noredzone {
 ; CHECK-LABEL: alloca3264:
 ; CHECK: .local i32{{$}}
 define void @alloca3264() {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK: i32.const $push[[L2:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L2]])
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16
+ ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: tee_local $push[[L5:.+]]=, $[[SP:.+]]=, $pop[[L6]]
  %r1 = alloca i32
  %r2 = alloca double
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 0
- ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ ; CHECK-NEXT: i32.const $push[[L0:.+]]=, 0
+ ; CHECK-NEXT: i32.store $discard=, 12($pop[[L5]]), $pop[[L0]]
  store i32 0, i32* %r1
- ; CHECK-NEXT: i64.const $push[[L0:.+]]=, 0
- ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L0]]
+ ; CHECK-NEXT: i64.const $push[[L1:.+]]=, 0
+ ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L1]]
  store double 0.0, double* %r2
  ; CHECK-NEXT: return
  ret void
@@ -51,52 +53,52 @@ define void @alloca3264() {
 ; CHECK-LABEL: allocarray:
 ; CHECK: .local i32{{$}}
 define void @allocarray() {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 144{{$}}
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.const $push[[L7:.+]]=, __stack_pointer
+ ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L5:.+]]=, 0($pop[[L4]])
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 144{{$}}
+ ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L5]], $pop[[L6]]
+ ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L7]]), $pop[[L11]]
  %r = alloca [33 x i32]
 
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]]
- ; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}}
- ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}}
+ ; CHECK-NEXT: i32.const $push[[L2:.+]]=, 24
+ ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $[[SP]], $pop[[L2]]
+ ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}}
+ ; CHECK-NEXT: i32.store $push[[L0:.+]]=, 0($pop[[L3]]), $pop[[L1]]{{$}}
+ ; CHECK-NEXT: i32.store $discard=, 12($[[SP]]), $pop[[L0]]{{$}}
  %p = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 0
  store i32 1, i32* %p
  %p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3
  store i32 1, i32* %p2
 
- ; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144
- ; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]]
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]]
+ ; CHECK: i32.const $push[[L10:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.const $push[[L8:.+]]=, 144
+ ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $[[SP]], $pop[[L8]]
+ ; CHECK-NEXT: i32.store $discard=, 0($pop[[L10]]), $pop[[L9]]
  ret void
 }
 
 ; CHECK-LABEL: non_mem_use
 define void @non_mem_use(i8** %addr) {
  ; CHECK: i32.const $push[[L1:.+]]=, 48
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.store $[[SP:.+]]=, {{.+}}, $pop[[L11]]
  %buf = alloca [27 x i8], align 16
  %r = alloca i64
  %r2 = alloca i64
  ; %r is at SP+8
+ ; CHECK: tee_local $push[[L12:.+]]=, $[[SP:.+]]=, $pop{{.+}}
  ; CHECK: i32.const $push[[OFF:.+]]=, 8
- ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]]
+ ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $pop[[L12]], $pop[[OFF]]
  ; CHECK-NEXT: call ext_func at FUNCTION, $pop[[ARG1]]
  call void @ext_func(i64* %r)
  ; %r2 is at SP+0, no add needed
- ; CHECK-NEXT: call ext_func at FUNCTION, [[SP]]
+ ; CHECK-NEXT: call ext_func at FUNCTION, $[[SP]]
  call void @ext_func(i64* %r2)
  ; Use as a value, but in a store
  ; %buf is at SP+16
  ; CHECK: i32.const $push[[OFF:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]]
+ ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $[[SP]], $pop[[OFF]]
  ; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]]
  %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0
  store i8* %gep, i8** %addr
@@ -106,23 +108,25 @@ define void @non_mem_use(i8** %addr) {
 ; CHECK-LABEL: allocarray_inbounds:
 ; CHECK: .local i32{{$}}
 define void @allocarray_inbounds() {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
+ ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($pop[[L3]])
+ ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L4]], $pop[[L5]]
+ ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L6]]), $pop[[L10]]{{$}}
  %r = alloca [5 x i32]
  ; CHECK: i32.const $push[[L3:.+]]=, 1
- ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ ; CHECK: i32.store {{.*}}=, 12($[[SP]]), $pop[[L3]]
  %p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0
  store i32 1, i32* %p
  ; This store should have both the GEP and the FI folded into it.
- ; CHECK-NEXT: i32.store {{.*}}=, 24([[SP]]), $pop
+ ; CHECK-NEXT: i32.store {{.*}}=, 24($[[SP]]), $pop
  %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3
  store i32 1, i32* %p2
  call void @ext_func(i64* null);
  ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
  ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]]
  ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
  ret void
 }
@@ -130,14 +134,13 @@ define void @allocarray_inbounds() {
 ; CHECK-LABEL: dynamic_alloca:
 define void @dynamic_alloca(i32 %alloc) {
  ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
+ ; CHECK-NEXT: i32.load $push[[L13:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: tee_local $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
+ ; CHECK-NEXT: copy_local [[FP:.+]]=, $pop[[L12]]{{$}}
  ; Target independent codegen bumps the stack pointer.
  ; CHECK: i32.sub
- ; CHECK-NEXT: copy_local [[SP]]=,
  ; Check that SP is written back to memory after decrement
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.store $discard=, 0($pop{{.+}}), 
  %r = alloca i32, i32 %alloc
  ; Target-independent codegen also calculates the store addr
  ; CHECK: call ext_func_i32 at FUNCTION
@@ -149,16 +152,17 @@ define void @dynamic_alloca(i32 %alloc)
 
 ; CHECK-LABEL: dynamic_alloca_redzone:
 define void @dynamic_alloca_redzone(i32 %alloc) {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
+ ; CHECK: i32.const $push[[L8:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L13:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: tee_local $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
+ ; CHECK-NEXT: copy_local [[FP:.+]]=, $pop[[L12]]{{$}}
  ; Target independent codegen bumps the stack pointer
- ; CHECK: i32.sub [[R:.+]]=,
- ; CHECK-NEXT: copy_local [[SP]]=, [[R]]
+ ; CHECK: i32.sub
  %r = alloca i32, i32 %alloc
- ; check-next here asserts that SP is not written back.
- ; CHECK-NEXT: i32.const $push[[ZERO:.+]]=, 0
- ; CHECK-NEXT: i32.store $discard=, 0([[R]]), $pop[[ZERO]]
+ ; CHECK-NEXT: tee_local       $push[[L8:.+]]=, $0=, $pop
+ ; CHECK-NEXT: copy_local      $discard=, $pop[[L8]]{{$}}
+ ; CHECK-NEXT: i32.const       $push[[L6:.+]]=, 0{{$}}
+ ; CHECK-NEXT: i32.store       $discard=, 0($0), $pop[[L6]]{{$}}
  store i32 0, i32* %r
  ; CHECK-NEXT: return
  ret void
@@ -167,26 +171,20 @@ define void @dynamic_alloca_redzone(i32
 ; CHECK-LABEL: dynamic_static_alloca:
 define void @dynamic_static_alloca(i32 %alloc) noredzone {
  ; Decrement SP in the prolog by the static amount and writeback to memory.
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
- ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.const $push[[L9:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L10:.+]]=, 0($pop[[L9]])
+ ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 16
+ ; CHECK-NEXT: i32.sub $push[[L20:.+]]=, $pop[[L10]], $pop[[L11]]
+ ; CHECK-NEXT: tee_local $push[[L19:.+]]=, $[[FP:.+]]=, $pop[[L20]]
+ ; CHECK:      i32.store $push[[L0:.+]]=, 0($pop{{.+}}), $[[FP]]
  ; Decrement SP in the body by the dynamic amount.
  ; CHECK: i32.sub
- ; CHECK: copy_local [[SP]]=,
  ; Writeback to memory.
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.store $discard=, 0($pop{{.+}}), $pop{{.+}}
  %r1 = alloca i32
  %r = alloca i32, i32 %alloc
  store i32 0, i32* %r
- ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]]
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
+ ; CHEC: i32.store $discard=, 0($pop{{.+}}), $pop{{.+}}
  ret void
 }
 
@@ -196,10 +194,10 @@ define void @dynamic_static_alloca(i32 %
 define void @copytoreg_fi(i1 %cond, i32* %b) {
 entry:
  ; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
  %addr = alloca i32
  ; CHECK: i32.const $push[[OFF:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, [[SP]], $pop[[OFF]]
+ ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
  ; CHECK-NEXT: copy_local [[COPY:.+]]=, $pop[[ADDR]]
  br label %body
 body:
@@ -216,12 +214,13 @@ declare i8* @llvm.frameaddress(i32)
 
 ; Test __builtin_frame_address(0).
 ; CHECK-LABEL: frameaddress_0:
-; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
-; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
-; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
-; CHECK-NEXT: call use_i8_star at FUNCTION, [[FP]]
-; CHEC K-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
-; CHEC K-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[FP]]
+; CHECK: i32.const $push[[L0:.+]]=, __stack_pointer
+; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L0]])
+; CHECK-NEXT: copy_local $push[[L4:.+]]=, $pop[[L3]]{{$}}
+; CHECK-NEXT: tee_local $push[[L2:.+]]=, $[[FP:.+]]=, $pop[[L4]]{{$}}
+; CHECK-NEXT: call use_i8_star at FUNCTION, $pop[[L2]]
+; CHECK-NEXT: i32.const $push[[L1:.+]]=, __stack_pointer
+; CHECK-NEXT: i32.store $discard=, 0($pop[[L1]]), $[[FP]]
 define void @frameaddress_0() {
   %t = call i8* @llvm.frameaddress(i32 0)
   call void @use_i8_star(i8* %t)




More information about the llvm-commits mailing list