[llvm] r258779 - [WebAssembly] Implement unaligned loads and stores.

Dan Gohman via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 25 19:39:31 PST 2016


Author: djg
Date: Mon Jan 25 21:39:31 2016
New Revision: 258779

URL: http://llvm.org/viewvc/llvm-project?rev=258779&view=rev
Log:
[WebAssembly] Implement unaligned loads and stores.

Differential Revision: http://reviews.llvm.org/D16534

Added:
    llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
    llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll
    llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll
Modified:
    llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt
    llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
    llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
    llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
    llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
    llvm/trunk/lib/Target/WebAssembly/WebAssembly.h
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
    llvm/trunk/test/CodeGen/WebAssembly/offset.ll
    llvm/trunk/test/CodeGen/WebAssembly/userstack.ll

Modified: llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt Mon Jan 25 21:39:31 2016
@@ -29,6 +29,7 @@ add_llvm_target(WebAssemblyCodeGen
   WebAssemblyRegNumbering.cpp
   WebAssemblyRegStackify.cpp
   WebAssemblySelectionDAGInfo.cpp
+  WebAssemblySetP2AlignOperands.cpp
   WebAssemblyStoreResults.cpp
   WebAssemblySubtarget.cpp
   WebAssemblyTargetMachine.cpp

Modified: llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp Mon Jan 25 21:39:31 2016
@@ -93,6 +93,7 @@ MCDisassembler::DecodeStatus WebAssembly
     const MCOperandInfo &Info = Desc.OpInfo[i];
     switch (Info.OperandType) {
     case MCOI::OPERAND_IMMEDIATE:
+    case WebAssembly::OPERAND_P2ALIGN:
     case WebAssembly::OPERAND_BASIC_BLOCK: {
       if (Pos + sizeof(uint64_t) > Bytes.size())
         return MCDisassembler::Fail;

Modified: llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp Mon Jan 25 21:39:31 2016
@@ -172,6 +172,16 @@ void WebAssemblyInstPrinter::printOperan
   }
 }
 
+void
+WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
+                                                       unsigned OpNo,
+                                                       raw_ostream &O) {
+  int64_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
+    return;
+  O << ":p2align=" << Imm;
+}
+
 const char *llvm::WebAssembly::TypeToString(MVT Ty) {
   switch (Ty.SimpleTy) {
   case MVT::i32:

Modified: llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h Mon Jan 25 21:39:31 2016
@@ -36,6 +36,8 @@ public:
 
   // Used by tblegen code.
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O);
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);

Modified: llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h Mon Jan 25 21:39:31 2016
@@ -45,7 +45,9 @@ enum OperandType {
   /// Basic block label in a branch construct.
   OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
   /// Floating-point immediate.
-  OPERAND_FPIMM
+  OPERAND_FPIMM,
+  /// p2align immediate for load and store address alignment.
+  OPERAND_P2ALIGN
 };
 
 /// WebAssembly-specific directive identifiers.
@@ -86,4 +88,47 @@ enum {
 #define GET_SUBTARGETINFO_ENUM
 #include "WebAssemblyGenSubtargetInfo.inc"
 
+namespace llvm {
+namespace WebAssembly {
+
+/// Return the default p2align value for a load or store with the given opcode.
+inline unsigned GetDefaultP2Align(unsigned Opcode) {
+  switch (Opcode) {
+  case WebAssembly::LOAD8_S_I32:
+  case WebAssembly::LOAD8_U_I32:
+  case WebAssembly::LOAD8_S_I64:
+  case WebAssembly::LOAD8_U_I64:
+  case WebAssembly::STORE8_I32:
+  case WebAssembly::STORE8_I64:
+    return 0;
+  case WebAssembly::LOAD16_S_I32:
+  case WebAssembly::LOAD16_U_I32:
+  case WebAssembly::LOAD16_S_I64:
+  case WebAssembly::LOAD16_U_I64:
+  case WebAssembly::STORE16_I32:
+  case WebAssembly::STORE16_I64:
+    return 1;
+  case WebAssembly::LOAD_I32:
+  case WebAssembly::LOAD_F32:
+  case WebAssembly::STORE_I32:
+  case WebAssembly::STORE_F32:
+  case WebAssembly::LOAD32_S_I64:
+  case WebAssembly::LOAD32_U_I64:
+  case WebAssembly::STORE32_I64:
+    return 2;
+  case WebAssembly::LOAD_I64:
+  case WebAssembly::LOAD_F64:
+  case WebAssembly::STORE_I64:
+  case WebAssembly::STORE_F64:
+    return 3;
+  default: llvm_unreachable("Only loads and stores have p2align values");
+  }
+}
+
+/// The operand number of the stored value in a store instruction.
+static const unsigned StoreValueOperandNo = 4;
+
+} // end namespace WebAssembly
+} // end namespace llvm
+
 #endif

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssembly.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssembly.h?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssembly.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssembly.h Mon Jan 25 21:39:31 2016
@@ -28,6 +28,7 @@ FunctionPass *createWebAssemblyOptimizeR
 FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel);
 FunctionPass *createWebAssemblyArgumentMove();
+FunctionPass *createWebAssemblySetP2AlignOperands();
 
 FunctionPass *createWebAssemblyStoreResults();
 FunctionPass *createWebAssemblyRegStackify();

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp Mon Jan 25 21:39:31 2016
@@ -84,6 +84,7 @@ static void adjustStackPointer(unsigned
   BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
       .addImm(0)
       .addReg(SPReg)
+      .addImm(2) // p2align
       .addMemOperand(LoadMMO);
   // Add/Subtract the frame size
   unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
@@ -102,6 +103,7 @@ static void adjustStackPointer(unsigned
   BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
       .addImm(0)
       .addReg(OffsetReg)
+      .addImm(2) // p2align
       .addReg(WebAssembly::SP32)
       .addMemOperand(MMO);
 }
@@ -169,6 +171,7 @@ void WebAssemblyFrameLowering::emitEpilo
   BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
       .addImm(0)
       .addReg(OffsetReg)
+      .addImm(2) // p2align
       .addReg(WebAssembly::SP32)
       .addMemOperand(MMO);
 }

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp Mon Jan 25 21:39:31 2016
@@ -285,6 +285,20 @@ bool WebAssemblyTargetLowering::isLegalA
   return true;
 }
 
+bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
+    EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
+    bool *Fast) const {
+  // WebAssembly supports unaligned accesses, though it should be declared
+  // with the p2align attribute on loads and stores which do so, and there
+  // may be a performance impact. We tell LLVM they're "fast" because
+  // for the kinds of things that LLVM uses this for (merging agacent stores
+  // of constants, etc.), WebAssembly implementations will either want the
+  // unaligned access or they'll split anyway.
+  if (Fast)
+    *Fast = true;
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // WebAssembly Lowering private implementation.
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h Mon Jan 25 21:39:31 2016
@@ -56,6 +56,8 @@ private:
   bool isCheapToSpeculateCtlz() const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
+  bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
+                                      bool *Fast) const override;
 
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td Mon Jan 25 21:39:31 2016
@@ -76,6 +76,12 @@ def f32imm_op : Operand<f32>;
 def f64imm_op : Operand<f64>;
 } // OperandType = "OPERAND_FPIMM"
 
+let OperandType = "OPERAND_P2ALIGN" in {
+def P2Align : Operand<i32> {
+  let PrintMethod = "printWebAssemblyP2AlignOperand";
+}
+} // OperandType = "OPERAND_P2ALIGN"
+
 } // OperandNamespace = "WebAssembly"
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td Mon Jan 25 21:39:31 2016
@@ -46,325 +46,354 @@ def regPlusGA : PatFrag<(ops node:$addr,
 let Defs = [ARGUMENTS] in {
 
 // Basic load.
-def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
-                 "i32.load\t$dst, ${off}(${addr})">;
-def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
-                 "i64.load\t$dst, ${off}(${addr})">;
-def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [],
-                 "f32.load\t$dst, ${off}(${addr})">;
-def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [],
-                 "f64.load\t$dst, ${off}(${addr})">;
+def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                   P2Align:$p2align), [],
+                 "i32.load\t$dst, ${off}(${addr})${p2align}">;
+def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                   P2Align:$p2align), [],
+                 "i64.load\t$dst, ${off}(${addr})${p2align}">;
+def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr,
+                                   P2Align:$p2align), [],
+                 "f32.load\t$dst, ${off}(${addr})${p2align}">;
+def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr,
+                                   P2Align:$p2align), [],
+                 "f64.load\t$dst, ${off}(${addr})${p2align}">;
 
 } // Defs = [ARGUMENTS]
 
 // Select loads with no constant offset.
-def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>;
-def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>;
-def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>;
-def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>;
+def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr, 0)>;
+def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr, 0)>;
+def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr, 0)>;
+def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr, 0)>;
 
 // Select loads with a constant offset.
 def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I32 imm:$off, $addr)>;
+          (LOAD_I32 imm:$off, $addr, 0)>;
 def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I64 imm:$off, $addr)>;
+          (LOAD_I64 imm:$off, $addr, 0)>;
 def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F32 imm:$off, $addr)>;
+          (LOAD_F32 imm:$off, $addr, 0)>;
 def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F64 imm:$off, $addr)>;
+          (LOAD_F64 imm:$off, $addr, 0)>;
 def : Pat<(i32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I32 tglobaladdr:$off, $addr)>;
+          (LOAD_I32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I64 tglobaladdr:$off, $addr)>;
+          (LOAD_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(f32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F32 tglobaladdr:$off, $addr)>;
+          (LOAD_F32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(f64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F64 tglobaladdr:$off, $addr)>;
+          (LOAD_F64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I32 texternalsym:$off, $addr)>;
+          (LOAD_I32 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I64 texternalsym:$off, $addr)>;
+          (LOAD_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F32 texternalsym:$off, $addr)>;
+          (LOAD_F32 texternalsym:$off, $addr, 0)>;
 def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F64 texternalsym:$off, $addr)>;
+          (LOAD_F64 texternalsym:$off, $addr, 0)>;
 
 // Select loads with just a constant offset.
-def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>;
-def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD_F32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD_F64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD_I32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD_F32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD_F64 texternalsym:$off, (CONST_I32 0), 0)>;
 
 let Defs = [ARGUMENTS] in {
 
 // Extending load.
-def LOAD8_S_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i32.load8_s\t$dst, ${off}(${addr})">;
-def LOAD8_U_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i32.load8_u\t$dst, ${off}(${addr})">;
-def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i32.load16_s\t$dst, ${off}(${addr})">;
-def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i32.load16_u\t$dst, ${off}(${addr})">;
-def LOAD8_S_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i64.load8_s\t$dst, ${off}(${addr})">;
-def LOAD8_U_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i64.load8_u\t$dst, ${off}(${addr})">;
-def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i64.load16_s\t$dst, ${off}(${addr})">;
-def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i64.load16_u\t$dst, ${off}(${addr})">;
-def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i64.load32_s\t$dst, ${off}(${addr})">;
-def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
-                     "i64.load32_u\t$dst, ${off}(${addr})">;
+def LOAD8_S_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i32.load8_s\t$dst, ${off}(${addr})${p2align}">;
+def LOAD8_U_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i32.load8_u\t$dst, ${off}(${addr})${p2align}">;
+def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i32.load16_s\t$dst, ${off}(${addr})${p2align}">;
+def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i32.load16_u\t$dst, ${off}(${addr})${p2align}">;
+def LOAD8_S_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i64.load8_s\t$dst, ${off}(${addr})${p2align}">;
+def LOAD8_U_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i64.load8_u\t$dst, ${off}(${addr})${p2align}">;
+def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i64.load16_s\t$dst, ${off}(${addr})${p2align}">;
+def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i64.load16_u\t$dst, ${off}(${addr})${p2align}">;
+def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i64.load32_s\t$dst, ${off}(${addr})${p2align}">;
+def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                       P2Align:$p2align), [],
+                     "i64.load32_u\t$dst, ${off}(${addr})${p2align}">;
 
 } // Defs = [ARGUMENTS]
 
 // Select extending loads with no constant offset.
-def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>;
-def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>;
-def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>;
-def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
-def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>;
-def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>;
-def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>;
-def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
-def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>;
-def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr, 0)>;
+def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>;
+def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr, 0)>;
+def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
+def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr, 0)>;
+def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>;
+def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr, 0)>;
+def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
+def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr, 0)>;
+def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
 
 // Select extending loads with a constant offset.
 def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I32 imm:$off, $addr)>;
+          (LOAD8_S_I32 imm:$off, $addr, 0)>;
 def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr)>;
+          (LOAD8_U_I32 imm:$off, $addr, 0)>;
 def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I32 imm:$off, $addr)>;
+          (LOAD16_S_I32 imm:$off, $addr, 0)>;
 def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr)>;
+          (LOAD16_U_I32 imm:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I64 imm:$off, $addr)>;
+          (LOAD8_S_I64 imm:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr)>;
+          (LOAD8_U_I64 imm:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I64 imm:$off, $addr)>;
+          (LOAD16_S_I64 imm:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr)>;
+          (LOAD16_U_I64 imm:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_S_I64 imm:$off, $addr)>;
+          (LOAD32_S_I64 imm:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr)>;
+          (LOAD32_U_I64 imm:$off, $addr, 0)>;
 def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I32 tglobaladdr:$off, $addr)>;
+          (LOAD8_S_I32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I32 tglobaladdr:$off, $addr)>;
+          (LOAD16_S_I32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I64 tglobaladdr:$off, $addr)>;
+          (LOAD8_S_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I64 tglobaladdr:$off, $addr)>;
+          (LOAD16_S_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_S_I64 tglobaladdr:$off, $addr)>;
+          (LOAD32_S_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i32 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I32 texternalsym:$off, $addr)>;
+          (LOAD8_S_I32 texternalsym:$off, $addr, 0)>;
 def : Pat<(i32 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr)>;
+          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
 def : Pat<(i32 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I32 texternalsym:$off, $addr)>;
+          (LOAD16_S_I32 texternalsym:$off, $addr, 0)>;
 def : Pat<(i32 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr)>;
+          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I64 texternalsym:$off, $addr)>;
+          (LOAD8_S_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr)>;
+          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I64 texternalsym:$off, $addr)>;
+          (LOAD16_S_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr)>;
+          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (sextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_S_I64 texternalsym:$off, $addr)>;
+          (LOAD32_S_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (zextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr)>;
+          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
 
 // Select extending loads with just a constant offset.
-def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi8 imm:$off)),
+          (LOAD8_S_I32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i32 (zextloadi8 imm:$off)),
+          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i32 (sextloadi16 imm:$off)),
+          (LOAD16_S_I32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i32 (zextloadi16 imm:$off)),
+          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (sextloadi8 imm:$off)),
+          (LOAD8_S_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (zextloadi8 imm:$off)),
+          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (sextloadi16 imm:$off)),
+          (LOAD16_S_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (zextloadi16 imm:$off)),
+          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (sextloadi32 imm:$off)),
+          (LOAD32_S_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (zextloadi32 imm:$off)),
+          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 
 // Resolve "don't care" extending loads to zero-extending loads. This is
 // somewhat arbitrary, but zero-extending is conceptually simpler.
 
 // Select "don't care" extending loads with no constant offset.
-def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, $addr)>;
-def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
-def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, $addr)>;
-def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
-def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, $addr, 0)>;
+def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
+def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, $addr, 0)>;
+def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
+def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
 
 // Select "don't care" extending loads with a constant offset.
 def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr)>;
+          (LOAD8_U_I32 imm:$off, $addr, 0)>;
 def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr)>;
+          (LOAD16_U_I32 imm:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr)>;
+          (LOAD8_U_I64 imm:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr)>;
+          (LOAD16_U_I64 imm:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr)>;
+          (LOAD32_U_I64 imm:$off, $addr, 0)>;
 def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
 def : Pat<(i32 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr)>;
+          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
 def : Pat<(i32 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr)>;
+          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr)>;
+          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr)>;
+          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
 def : Pat<(i64 (extloadi32 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr)>;
+          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
 
 // Select "don't care" extending loads with just a constant offset.
-def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi8 imm:$off)),
+          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i32 (extloadi16 imm:$off)),
+          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (extloadi8 imm:$off)),
+          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (extloadi16 imm:$off)),
+          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i64 (extloadi32 imm:$off)),
+          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
 
 let Defs = [ARGUMENTS] in {
 
@@ -374,193 +403,202 @@ let Defs = [ARGUMENTS] in {
 // instruction definition patterns that don't reference all of the output
 // operands.
 // Note: WebAssembly inverts SelectionDAG's usual operand order.
-def STORE_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
-                   "i32.store\t$dst, ${off}(${addr}), $val">;
-def STORE_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
-                   "i64.store\t$dst, ${off}(${addr}), $val">;
-def STORE_F32  : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [],
-                   "f32.store\t$dst, ${off}(${addr}), $val">;
-def STORE_F64  : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [],
-                   "f64.store\t$dst, ${off}(${addr}), $val">;
+def STORE_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                     P2Align:$p2align, I32:$val), [],
+                   "i32.store\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                     P2Align:$p2align, I64:$val), [],
+                   "i64.store\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE_F32  : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr,
+                                     P2Align:$p2align, F32:$val), [],
+                   "f32.store\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE_F64  : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr,
+                                     P2Align:$p2align, F64:$val), [],
+                   "f64.store\t$dst, ${off}(${addr})${p2align}, $val">;
 
 } // Defs = [ARGUMENTS]
 
 // Select stores with no constant offset.
-def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>;
+def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, 0, I32:$val)>;
+def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, 0, I64:$val)>;
+def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, 0, F32:$val)>;
+def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, 0, F64:$val)>;
 
 // Select stores with a constant offset.
 def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I32 imm:$off, I32:$addr, I32:$val)>;
+          (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I64 imm:$off, I32:$addr, I64:$val)>;
+          (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F32 imm:$off, I32:$addr, F32:$val)>;
+          (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F64 imm:$off, I32:$addr, F64:$val)>;
+          (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>;
 def : Pat<(store I32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+          (STORE_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+          (STORE_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>;
+          (STORE_F32 tglobaladdr:$off, I32:$addr, 0, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>;
+          (STORE_F64 tglobaladdr:$off, I32:$addr, 0, F64:$val)>;
 def : Pat<(store I32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+          (STORE_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(store I64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+          (STORE_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(store F32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>;
+          (STORE_F32 texternalsym:$off, I32:$addr, 0, F32:$val)>;
 def : Pat<(store F64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>;
+          (STORE_F64 texternalsym:$off, I32:$addr, 0, F64:$val)>;
 
 // Select stores with just a constant offset.
 def : Pat<(store I32:$val, imm:$off),
-          (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+          (STORE_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(store I64:$val, imm:$off),
-          (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+          (STORE_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(store F32:$val, imm:$off),
-          (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>;
+          (STORE_F32 imm:$off, (CONST_I32 0), 0, F32:$val)>;
 def : Pat<(store F64:$val, imm:$off),
-          (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>;
+          (STORE_F64 imm:$off, (CONST_I32 0), 0, F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+          (STORE_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+          (STORE_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>;
+          (STORE_F32 tglobaladdr:$off, (CONST_I32 0), 0, F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>;
+          (STORE_F64 tglobaladdr:$off, (CONST_I32 0), 0, F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+          (STORE_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+          (STORE_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>;
+          (STORE_F32 texternalsym:$off, (CONST_I32 0), 0, F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>;
+          (STORE_F64 texternalsym:$off, (CONST_I32 0), 0, F64:$val)>;
 
 let Defs = [ARGUMENTS] in {
 
 // Truncating store.
-def STORE8_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
-                    "i32.store8\t$dst, ${off}(${addr}), $val">;
-def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
-                    "i32.store16\t$dst, ${off}(${addr}), $val">;
-def STORE8_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
-                    "i64.store8\t$dst, ${off}(${addr}), $val">;
-def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
-                    "i64.store16\t$dst, ${off}(${addr}), $val">;
-def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
-                    "i64.store32\t$dst, ${off}(${addr}), $val">;
+def STORE8_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                      P2Align:$p2align, I32:$val), [],
+                    "i32.store8\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
+                                      P2Align:$p2align, I32:$val), [],
+                    "i32.store16\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE8_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                      P2Align:$p2align, I64:$val), [],
+                    "i64.store8\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                      P2Align:$p2align, I64:$val), [],
+                    "i64.store16\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
+                                      P2Align:$p2align, I64:$val), [],
+                    "i64.store32\t$dst, ${off}(${addr})${p2align}, $val">;
 
 } // Defs = [ARGUMENTS]
 
 // Select truncating stores with no constant offset.
 def : Pat<(truncstorei8 I32:$val, I32:$addr),
-          (STORE8_I32 0, I32:$addr, I32:$val)>;
+          (STORE8_I32 0, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, I32:$addr),
-          (STORE16_I32 0, I32:$addr, I32:$val)>;
+          (STORE16_I32 0, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, I32:$addr),
-          (STORE8_I64 0, I32:$addr, I64:$val)>;
+          (STORE8_I64 0, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, I32:$addr),
-          (STORE16_I64 0, I32:$addr, I64:$val)>;
+          (STORE16_I64 0, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, I32:$addr),
-          (STORE32_I64 0, I32:$addr, I64:$val)>;
+          (STORE32_I64 0, I32:$addr, 0, I64:$val)>;
 
 // Select truncating stores with a constant offset.
 def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I32 imm:$off, I32:$addr, I32:$val)>;
+          (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I32 imm:$off, I32:$addr, I32:$val)>;
+          (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I64 imm:$off, I32:$addr, I64:$val)>;
+          (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I64 imm:$off, I32:$addr, I64:$val)>;
+          (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE32_I64 imm:$off, I32:$addr, I64:$val)>;
+          (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+          (STORE8_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+          (STORE16_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+          (STORE8_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+          (STORE16_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+          (STORE32_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (add I32:$addr,
                                        (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+          (STORE8_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+          (STORE16_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (add I32:$addr,
                              (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+          (STORE8_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+          (STORE16_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+          (STORE32_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
 
 // Select truncating stores with just a constant offset.
 def : Pat<(truncstorei8 I32:$val, imm:$off),
-          (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+          (STORE8_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, imm:$off),
-          (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+          (STORE16_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, imm:$off),
-          (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+          (STORE8_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, imm:$off),
-          (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+          (STORE16_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, imm:$off),
-          (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+          (STORE32_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+          (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+          (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+          (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+          (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+          (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+          (STORE8_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+          (STORE16_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+          (STORE8_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+          (STORE16_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+          (STORE32_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
 
 let Defs = [ARGUMENTS] in {
 

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h Mon Jan 25 21:39:31 2016
@@ -58,6 +58,11 @@ public:
       VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
     VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
   }
+  void unstackifyVReg(unsigned VReg) {
+    if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
+      return;
+    VRegStackified.reset(TargetRegisterInfo::virtReg2Index(VReg));
+  }
   bool isVRegStackified(unsigned VReg) const {
     if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
       return false;

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp Mon Jan 25 21:39:31 2016
@@ -70,8 +70,10 @@ bool WebAssemblyPeephole::runOnMachineFu
         MachineOperand &MO = MI.getOperand(0);
         unsigned OldReg = MO.getReg();
         // TODO: Handle SP/physregs
-        if (OldReg == MI.getOperand(3).getReg() &&
-            TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) {
+        if (OldReg ==
+                MI.getOperand(WebAssembly::StoreValueOperandNo).getReg() &&
+            TargetRegisterInfo::isVirtualRegister(
+                MI.getOperand(WebAssembly::StoreValueOperandNo).getReg())) {
           Changed = true;
           unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
           MO.setReg(NewReg);

Added: llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp?rev=258779&view=auto
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp (added)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp Mon Jan 25 21:39:31 2016
@@ -0,0 +1,108 @@
+//=- WebAssemblySetP2AlignOperands.cpp - Set alignments on loads and stores -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file sets the p2align operands on load and store instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-set-p2align-operands"
+
+namespace {
+class WebAssemblySetP2AlignOperands final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblySetP2AlignOperands() : MachineFunctionPass(ID) {}
+
+  const char *getPassName() const override {
+    return "WebAssembly Set p2align Operands";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblySetP2AlignOperands::ID = 0;
+FunctionPass *llvm::createWebAssemblySetP2AlignOperands() {
+  return new WebAssemblySetP2AlignOperands();
+}
+
+bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+    dbgs() << "********** Set p2align Operands **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
+
+  bool Changed = false;
+
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      switch (MI.getOpcode()) {
+      case WebAssembly::LOAD_I32:
+      case WebAssembly::LOAD_I64:
+      case WebAssembly::LOAD_F32:
+      case WebAssembly::LOAD_F64:
+      case WebAssembly::LOAD8_S_I32:
+      case WebAssembly::LOAD8_U_I32:
+      case WebAssembly::LOAD16_S_I32:
+      case WebAssembly::LOAD16_U_I32:
+      case WebAssembly::LOAD8_S_I64:
+      case WebAssembly::LOAD8_U_I64:
+      case WebAssembly::LOAD16_S_I64:
+      case WebAssembly::LOAD16_U_I64:
+      case WebAssembly::LOAD32_S_I64:
+      case WebAssembly::LOAD32_U_I64:
+      case WebAssembly::STORE_I32:
+      case WebAssembly::STORE_I64:
+      case WebAssembly::STORE_F32:
+      case WebAssembly::STORE_F64:
+      case WebAssembly::STORE8_I32:
+      case WebAssembly::STORE16_I32:
+      case WebAssembly::STORE8_I64:
+      case WebAssembly::STORE16_I64:
+      case WebAssembly::STORE32_I64:
+        assert(MI.getOperand(3).getImm() == 0 &&
+               "ISel should set p2align operands to 0");
+        assert(MI.hasOneMemOperand() &&
+               "Load and store instructions have exactly one mem operand");
+        assert((*MI.memoperands_begin())->getSize() ==
+                   (UINT64_C(1)
+                    << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
+               "Default p2align value should be natural");
+        assert(MI.getDesc().OpInfo[3].OperandType ==
+                   WebAssembly::OPERAND_P2ALIGN &&
+               "Load and store instructions should have a p2align operand");
+        MI.getOperand(3).setImm(
+            Log2_64((*MI.memoperands_begin())->getAlignment()));
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  return Changed;
+}

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp Mon Jan 25 21:39:31 2016
@@ -91,7 +91,8 @@ bool WebAssemblyStoreResults::runOnMachi
       case WebAssembly::STORE_I32:
       case WebAssembly::STORE_I64:
         unsigned ToReg = MI.getOperand(0).getReg();
-        unsigned FromReg = MI.getOperand(3).getReg();
+        unsigned FromReg =
+            MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
         for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
           MachineOperand &O = *I++;
           MachineInstr *Where = O.getParent();

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp Mon Jan 25 21:39:31 2016
@@ -154,6 +154,10 @@ bool WebAssemblyPassConfig::addInstSelec
   // so that we can fix up the ARGUMENT instructions before anything else
   // sees them in the wrong place.
   addPass(createWebAssemblyArgumentMove());
+  // Set the p2align operands. This information is present during ISel, however
+  // it's inconvenient to collect. Collect it now, and update the immediate
+  // operands.
+  addPass(createWebAssemblySetP2AlignOperands());
   return false;
 }
 

Added: llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll?rev=258779&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll (added)
+++ llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll Mon Jan 25 21:39:31 2016
@@ -0,0 +1,210 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test loads and stores with custom alignment values.
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ldi32_a1:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ldi32_a1(i32 *%p) {
+  %v = load i32, i32* %p, align 1
+  ret i32 %v
+}
+
+; CHECK-LABEL: ldi32_a2:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ldi32_a2(i32 *%p) {
+  %v = load i32, i32* %p, align 2
+  ret i32 %v
+}
+
+; 4 is the default alignment for i32 so no attribute is needed.
+
+; CHECK-LABEL: ldi32_a4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ldi32_a4(i32 *%p) {
+  %v = load i32, i32* %p, align 4
+  ret i32 %v
+}
+
+; The default alignment in LLVM is the same as the defualt alignment in wasm.
+
+; CHECK-LABEL: ldi32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ldi32(i32 *%p) {
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+; CHECK-LABEL: ldi32_a8:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0):p2align=3{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ldi32_a8(i32 *%p) {
+  %v = load i32, i32* %p, align 8
+  ret i32 %v
+}
+
+; Extending loads.
+
+; CHECK-LABEL: ldi8_a1:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i8 @ldi8_a1(i8 *%p) {
+  %v = load i8, i8* %p, align 1
+  ret i8 %v
+}
+
+; CHECK-LABEL: ldi8_a2:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load8_u $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i8 @ldi8_a2(i8 *%p) {
+  %v = load i8, i8* %p, align 2
+  ret i8 %v
+}
+
+; CHECK-LABEL: ldi16_a1:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i16 @ldi16_a1(i16 *%p) {
+  %v = load i16, i16* %p, align 1
+  ret i16 %v
+}
+
+; CHECK-LABEL: ldi16_a2:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load16_u $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i16 @ldi16_a2(i16 *%p) {
+  %v = load i16, i16* %p, align 2
+  ret i16 %v
+}
+
+; CHECK-LABEL: ldi16_a4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=2{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i16 @ldi16_a4(i16 *%p) {
+  %v = load i16, i16* %p, align 4
+  ret i16 %v
+}
+
+; Stores.
+
+; CHECK-LABEL: sti32_a1:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0):p2align=0, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a1(i32 *%p, i32 %v) {
+  store i32 %v, i32* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: sti32_a2:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0):p2align=1, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a2(i32 *%p, i32 %v) {
+  store i32 %v, i32* %p, align 2
+  ret void
+}
+
+; 4 is the default alignment for i32 so no attribute is needed.
+
+; CHECK-LABEL: sti32_a4:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a4(i32 *%p, i32 %v) {
+  store i32 %v, i32* %p, align 4
+  ret void
+}
+
+; The default alignment in LLVM is the same as the defualt alignment in wasm.
+
+; CHECK-LABEL: sti32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32(i32 *%p, i32 %v) {
+  store i32 %v, i32* %p
+  ret void
+}
+
+; CHECK-LABEL: sti32_a8:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0):p2align=3, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a8(i32 *%p, i32 %v) {
+  store i32 %v, i32* %p, align 8
+  ret void
+}
+
+; Truncating stores.
+
+; CHECK-LABEL: sti8_a1:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store8 $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti8_a1(i8 *%p, i8 %v) {
+  store i8 %v, i8* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: sti8_a2:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store8 $discard=, 0($0):p2align=1, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti8_a2(i8 *%p, i8 %v) {
+  store i8 %v, i8* %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: sti16_a1:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store16 $discard=, 0($0):p2align=0, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti16_a1(i16 *%p, i16 %v) {
+  store i16 %v, i16* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: sti16_a2:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store16 $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti16_a2(i16 *%p, i16 %v) {
+  store i16 %v, i16* %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: sti16_a4:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store16 $discard=, 0($0):p2align=2, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti16_a4(i16 *%p, i16 %v) {
+  store i16 %v, i16* %p, align 4
+  ret void
+}

Added: llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll?rev=258779&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll (added)
+++ llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll Mon Jan 25 21:39:31 2016
@@ -0,0 +1,323 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test loads and stores with custom alignment values.
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ldi64_a1:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64_a1(i64 *%p) {
+  %v = load i64, i64* %p, align 1
+  ret i64 %v
+}
+
+; CHECK-LABEL: ldi64_a2:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64_a2(i64 *%p) {
+  %v = load i64, i64* %p, align 2
+  ret i64 %v
+}
+
+; CHECK-LABEL: ldi64_a4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=2{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64_a4(i64 *%p) {
+  %v = load i64, i64* %p, align 4
+  ret i64 %v
+}
+
+; 8 is the default alignment for i32 so no attribute is needed.
+
+; CHECK-LABEL: ldi64_a8:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64_a8(i64 *%p) {
+  %v = load i64, i64* %p, align 8
+  ret i64 %v
+}
+
+; The default alignment in LLVM is the same as the defualt alignment in wasm.
+
+; CHECK-LABEL: ldi64:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64(i64 *%p) {
+  %v = load i64, i64* %p
+  ret i64 %v
+}
+
+; CHECK-LABEL: ldi64_a16:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=4{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64_a16(i64 *%p) {
+  %v = load i64, i64* %p, align 16
+  ret i64 %v
+}
+
+; Extending loads.
+
+; CHECK-LABEL: ldi8_a1:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi8_a1(i8 *%p) {
+  %v = load i8, i8* %p, align 1
+  %w = zext i8 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi8_a2:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load8_u $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi8_a2(i8 *%p) {
+  %v = load i8, i8* %p, align 2
+  %w = zext i8 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi16_a1:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi16_a1(i16 *%p) {
+  %v = load i16, i16* %p, align 1
+  %w = zext i16 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi16_a2:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load16_u $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi16_a2(i16 *%p) {
+  %v = load i16, i16* %p, align 2
+  %w = zext i16 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi16_a4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=2{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi16_a4(i16 *%p) {
+  %v = load i16, i16* %p, align 4
+  %w = zext i16 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi32_a1:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi32_a1(i32 *%p) {
+  %v = load i32, i32* %p, align 1
+  %w = zext i32 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi32_a2:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi32_a2(i32 *%p) {
+  %v = load i32, i32* %p, align 2
+  %w = zext i32 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi32_a4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi32_a4(i32 *%p) {
+  %v = load i32, i32* %p, align 4
+  %w = zext i32 %v to i64
+  ret i64 %w
+}
+
+; CHECK-LABEL: ldi32_a8:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0):p2align=3{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi32_a8(i32 *%p) {
+  %v = load i32, i32* %p, align 8
+  %w = zext i32 %v to i64
+  ret i64 %w
+}
+
+; Stores.
+
+; CHECK-LABEL: sti64_a1:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0):p2align=0, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64_a1(i64 *%p, i64 %v) {
+  store i64 %v, i64* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: sti64_a2:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0):p2align=1, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64_a2(i64 *%p, i64 %v) {
+  store i64 %v, i64* %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: sti64_a4:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0):p2align=2, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64_a4(i64 *%p, i64 %v) {
+  store i64 %v, i64* %p, align 4
+  ret void
+}
+
+; 8 is the default alignment for i32 so no attribute is needed.
+
+; CHECK-LABEL: sti64_a8:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64_a8(i64 *%p, i64 %v) {
+  store i64 %v, i64* %p, align 8
+  ret void
+}
+
+; The default alignment in LLVM is the same as the defualt alignment in wasm.
+
+; CHECK-LABEL: sti64:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64(i64 *%p, i64 %v) {
+  store i64 %v, i64* %p
+  ret void
+}
+
+; CHECK-LABEL: sti64_a16:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0):p2align=4, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64_a16(i64 *%p, i64 %v) {
+  store i64 %v, i64* %p, align 16
+  ret void
+}
+
+; Truncating stores.
+
+; CHECK-LABEL: sti8_a1:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store8 $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti8_a1(i8 *%p, i64 %w) {
+  %v = trunc i64 %w to i8
+  store i8 %v, i8* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: sti8_a2:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store8 $discard=, 0($0):p2align=1, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti8_a2(i8 *%p, i64 %w) {
+  %v = trunc i64 %w to i8
+  store i8 %v, i8* %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: sti16_a1:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store16 $discard=, 0($0):p2align=0, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti16_a1(i16 *%p, i64 %w) {
+  %v = trunc i64 %w to i16
+  store i16 %v, i16* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: sti16_a2:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store16 $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti16_a2(i16 *%p, i64 %w) {
+  %v = trunc i64 %w to i16
+  store i16 %v, i16* %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: sti16_a4:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store16 $discard=, 0($0):p2align=2, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti16_a4(i16 *%p, i64 %w) {
+  %v = trunc i64 %w to i16
+  store i16 %v, i16* %p, align 4
+  ret void
+}
+
+; CHECK-LABEL: sti32_a1:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store32 $discard=, 0($0):p2align=0, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a1(i32 *%p, i64 %w) {
+  %v = trunc i64 %w to i32
+  store i32 %v, i32* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: sti32_a2:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store32 $discard=, 0($0):p2align=1, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a2(i32 *%p, i64 %w) {
+  %v = trunc i64 %w to i32
+  store i32 %v, i32* %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: sti32_a4:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store32 $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a4(i32 *%p, i64 %w) {
+  %v = trunc i64 %w to i32
+  store i32 %v, i32* %p, align 4
+  ret void
+}
+
+; CHECK-LABEL: sti32_a8:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store32 $discard=, 0($0):p2align=3, $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32_a8(i32 *%p, i64 %w) {
+  %v = trunc i64 %w to i32
+  store i32 %v, i32* %p, align 8
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/WebAssembly/offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/offset.ll?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/offset.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/offset.ll Mon Jan 25 21:39:31 2016
@@ -372,14 +372,27 @@ define void @aggregate_load_store({i32,i
   ret void
 }
 
-; Fold the offsets when lowering aggregate return values.
+; Fold the offsets when lowering aggregate return values. The stores get
+; merged into i64 stores.
 
 ; CHECK-LABEL: aggregate_return:
-; CHECK: i32.const   $push0=, 0{{$}}
-; CHECK: i32.store   $push1=, 12($0), $pop0{{$}}
-; CHECK: i32.store   $push2=, 8($0), $pop1{{$}}
-; CHECK: i32.store   $push3=, 4($0), $pop2{{$}}
-; CHECK: i32.store   $discard=, 0($0), $pop3{{$}}
+; CHECK: i64.const   $push0=, 0{{$}}
+; CHECK: i64.store   $push1=, 8($0):p2align=2, $pop0{{$}}
+; CHECK: i64.store   $discard=, 0($0):p2align=2, $pop1{{$}}
 define {i32,i32,i32,i32} @aggregate_return() {
   ret {i32,i32,i32,i32} zeroinitializer
 }
+
+; Fold the offsets when lowering aggregate return values. The stores are not
+; merged.
+
+; CHECK-LABEL: aggregate_return_without_merge:
+; CHECK: i32.const   $push0=, 0{{$}}
+; CHECK: i32.store8  $push1=, 14($0), $pop0{{$}}
+; CHECK: i32.store16 $push2=, 12($0), $pop1{{$}}
+; CHECK: i32.store   $discard=, 8($0), $pop2{{$}}
+; CHECK: i64.const   $push3=, 0{{$}}
+; CHECK: i64.store   $discard=, 0($0), $pop3{{$}}
+define {i64,i32,i16,i8} @aggregate_return_without_merge() {
+  ret {i64,i32,i16,i8} zeroinitializer
+}

Modified: llvm/trunk/test/CodeGen/WebAssembly/userstack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/userstack.ll?rev=258779&r1=258778&r2=258779&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/userstack.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/userstack.ll Mon Jan 25 21:39:31 2016
@@ -57,7 +57,7 @@ define void @allocarray() {
  ; CHECK-NEXT: i32.store [[SP]]=, 0([[L2]]), [[SP]]
  %r = alloca [5 x i32]
 
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 4
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12
  ; CHECK-NEXT: i32.const [[L5:.+]]=, 12
  ; CHECK-NEXT: i32.add [[L5]]=, [[SP]], [[L5]]
  ; CHECK-NEXT: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]]
@@ -66,7 +66,7 @@ define void @allocarray() {
  ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}}
  %p = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 0
  store i32 1, i32* %p
- %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 1
+ %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 3
  store i32 1, i32* %p2
 
  ; CHECK-NEXT: i32.const [[L7:.+]]=, 32
@@ -89,8 +89,8 @@ define void @allocarray_inbounds() {
  %p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0
  store i32 1, i32* %p
  ; This store should have both the GEP and the FI folded into it.
- ; CHECK-NEXT: i32.store {{.*}}=, 16([[SP]]), $pop
- %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 1
+ ; CHECK-NEXT: i32.store {{.*}}=, 24([[SP]]), $pop
+ %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3
  store i32 1, i32* %p2
  ; CHECK: i32.const [[L7:.+]]=, 32
  ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]]




More information about the llvm-commits mailing list