[llvm] r359621 - [AMDGPU] gfx1010 VMEM and SMEM implementation

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 30 15:08:23 PDT 2019


Author: rampitec
Date: Tue Apr 30 15:08:23 2019
New Revision: 359621

URL: http://llvm.org/viewvc/llvm-project?rev=359621&view=rev
Log:
[AMDGPU] gfx1010 VMEM and SMEM implementation

Differential Revision: https://reviews.llvm.org/D61330

Added:
    llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
    llvm/trunk/test/MC/AMDGPU/flat-gfx10.s
Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
    llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
    llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp
    llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SMInstructions.td
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
    llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
    llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
    llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
    llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
    llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir
    llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
    llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
    llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
    llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
    llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
    llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
    llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir
    llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
    llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
    llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
    llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
    llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir
    llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir
    llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
    llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir
    llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir
    llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir
    llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
    llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
    llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir
    llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir
    llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
    llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir
    llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir
    llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll
    llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
    llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
    llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
    llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
    llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
    llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
    llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
    llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
    llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
    llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir
    llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
    llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
    llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
    llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
    llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
    llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
    llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
    llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
    llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
    llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
    llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
    llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
    llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir
    llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
    llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir
    llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
    llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
    llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll
    llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
    llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
    llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
    llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
    llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
    llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir
    llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
    llvm/trunk/test/MC/AMDGPU/flat-global.s
    llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s

Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Tue Apr 30 15:08:23 2019
@@ -1431,6 +1431,14 @@ def int_amdgcn_ds_bpermute :
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
 
 //===----------------------------------------------------------------------===//
+// GFX10 Intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_amdgcn_s_get_waveid_in_workgroup :
+  GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
+  Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
+
+//===----------------------------------------------------------------------===//
 // Deep learning intrinsics.
 //===----------------------------------------------------------------------===//
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Tue Apr 30 15:08:23 2019
@@ -125,10 +125,10 @@ private:
   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
-                   SDValue &TFE) const;
+                   SDValue &TFE, SDValue &DLC) const;
   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
-                         SDValue &SLC, SDValue &TFE) const;
+                         SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
                          SDValue &SLC) const;
@@ -141,19 +141,19 @@ private:
 
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
-                         SDValue &TFE) const;
+                         SDValue &TFE, SDValue &DLC) const;
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
                          SDValue &Offset, SDValue &SLC) const;
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
                          SDValue &Offset) const;
 
-  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
+  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
                         SDValue &Offset, SDValue &SLC) const;
-  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
+  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
                               SDValue &Offset, SDValue &SLC) const;
 
   template <bool IsSigned>
-  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
+  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
                         SDValue &Offset, SDValue &SLC) const;
 
   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
@@ -1221,7 +1221,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDV
                                      SDValue &Offset, SDValue &Offen,
                                      SDValue &Idxen, SDValue &Addr64,
                                      SDValue &GLC, SDValue &SLC,
-                                     SDValue &TFE) const {
+                                     SDValue &TFE, SDValue &DLC) const {
   // Subtarget prefers to use flat instruction
   if (Subtarget->useFlatForGlobal())
     return false;
@@ -1233,6 +1233,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDV
   if (!SLC.getNode())
     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
 
   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1311,7 +1312,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDV
 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
                                            SDValue &VAddr, SDValue &SOffset,
                                            SDValue &Offset, SDValue &GLC,
-                                           SDValue &SLC, SDValue &TFE) const {
+                                           SDValue &SLC, SDValue &TFE,
+                                           SDValue &DLC) const {
   SDValue Ptr, Offen, Idxen, Addr64;
 
   // addr64 bit was removed for volcanic islands.
@@ -1319,7 +1321,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr
     return false;
 
   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
-              GLC, SLC, TFE))
+              GLC, SLC, TFE, DLC))
     return false;
 
   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1341,9 +1343,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr
                                            SDValue &Offset,
                                            SDValue &SLC) const {
   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
-  SDValue GLC, TFE;
+  SDValue GLC, TFE, DLC;
 
-  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
+  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
 }
 
 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -1468,13 +1470,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScra
 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                            SDValue &SOffset, SDValue &Offset,
                                            SDValue &GLC, SDValue &SLC,
-                                           SDValue &TFE) const {
+                                           SDValue &TFE, SDValue &DLC) const {
   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
   const SIInstrInfo *TII =
     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
-              GLC, SLC, TFE))
+              GLC, SLC, TFE, DLC))
     return false;
 
   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1496,57 +1498,42 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffs
 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                            SDValue &Soffset, SDValue &Offset
                                            ) const {
-  SDValue GLC, SLC, TFE;
+  SDValue GLC, SLC, TFE, DLC;
 
-  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
 }
 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                            SDValue &Soffset, SDValue &Offset,
                                            SDValue &SLC) const {
-  SDValue GLC, TFE;
+  SDValue GLC, TFE, DLC;
 
-  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
 }
 
 template <bool IsSigned>
-bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
+                                          SDValue Addr,
                                           SDValue &VAddr,
                                           SDValue &Offset,
                                           SDValue &SLC) const {
-  int64_t OffsetVal = 0;
-
-  if (Subtarget->hasFlatInstOffsets() &&
-      CurDAG->isBaseWithConstantOffset(Addr)) {
-    SDValue N0 = Addr.getOperand(0);
-    SDValue N1 = Addr.getOperand(1);
-    int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
-
-    if ((IsSigned && isInt<13>(COffsetVal)) ||
-        (!IsSigned && isUInt<12>(COffsetVal))) {
-      Addr = N0;
-      OffsetVal = COffsetVal;
-    }
-  }
-
-  VAddr = Addr;
-  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
-  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
-
-  return true;
+  return static_cast<const SITargetLowering*>(getTargetLowering())->
+    SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
 }
 
-bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
+                                          SDValue Addr,
                                           SDValue &VAddr,
                                           SDValue &Offset,
                                           SDValue &SLC) const {
-  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
+  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
 }
 
-bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
+                                          SDValue Addr,
                                           SDValue &VAddr,
                                           SDValue &Offset,
                                           SDValue &SLC) const {
-  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
+  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
 }
 
 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Apr 30 15:08:23 2019
@@ -2886,6 +2886,61 @@ bool AMDGPUTargetLowering::shouldCombine
   return true;
 }
 
+// Find a load or store from corresponding pattern root.
+// Roots may be build_vector, bitconvert or their combinations.
+static MemSDNode* findMemSDNode(SDNode *N) {
+  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
+  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
+    return MN;
+  assert(isa<BuildVectorSDNode>(N));
+  for (SDValue V : N->op_values())
+    if (MemSDNode *MN =
+          dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
+      return MN;
+  llvm_unreachable("cannot find MemSDNode in the pattern!");
+}
+
+bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned,
+                                            SelectionDAG &DAG,
+                                            SDNode *N,
+                                            SDValue Addr,
+                                            SDValue &VAddr,
+                                            SDValue &Offset,
+                                            SDValue &SLC) const {
+  const GCNSubtarget &ST =
+        DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
+  int64_t OffsetVal = 0;
+
+  if (ST.hasFlatInstOffsets() &&
+      (!ST.hasFlatSegmentOffsetBug() ||
+       findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
+      DAG.isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+    if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+      if ((IsSigned && isInt<12>(COffsetVal)) ||
+          (!IsSigned && isUInt<11>(COffsetVal))) {
+        Addr = N0;
+        OffsetVal = COffsetVal;
+      }
+    } else {
+      if ((IsSigned && isInt<13>(COffsetVal)) ||
+          (!IsSigned && isUInt<12>(COffsetVal))) {
+        Addr = N0;
+        OffsetVal = COffsetVal;
+      }
+    }
+  }
+
+  VAddr = Addr;
+  Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+  SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1);
+
+  return true;
+}
+
 // Replace load of an illegal type with a store of a bitcast to a friendlier
 // type.
 SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Tue Apr 30 15:08:23 2019
@@ -323,6 +323,10 @@ public:
   }
 
   AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+
+  bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N,
+                        SDValue Addr, SDValue &VAddr, SDValue &Offset,
+                        SDValue &SLC) const;
 };
 
 namespace AMDGPUISD {

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Tue Apr 30 15:08:23 2019
@@ -356,7 +356,8 @@ bool AMDGPUInstructionSelector::selectG_
           .add(I.getOperand(0))
           .addImm(0)  // offset
           .addImm(0)  // glc
-          .addImm(0); // slc
+          .addImm(0)  // slc
+          .addImm(0); // dlc
 
 
   // Now that we selected an opcode, we need to constrain the register
@@ -532,7 +533,8 @@ bool AMDGPUInstructionSelector::selectG_
                                .addReg(PtrReg)
                                .addImm(0)  // offset
                                .addImm(0)  // glc
-                               .addImm(0); // slc
+                               .addImm(0)  // slc
+                               .addImm(0); // dlc
 
   bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
   I.eraseFromParent();

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Apr 30 15:08:23 2019
@@ -139,6 +139,7 @@ public:
     ImmTyInstOffset,
     ImmTyOffset0,
     ImmTyOffset1,
+    ImmTyDLC,
     ImmTyGLC,
     ImmTySLC,
     ImmTyTFE,
@@ -314,6 +315,7 @@ public:
   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
   bool isGDS() const { return isImmTy(ImmTyGDS); }
   bool isLDS() const { return isImmTy(ImmTyLDS); }
+  bool isDLC() const { return isImmTy(ImmTyDLC); }
   bool isGLC() const { return isImmTy(ImmTyGLC); }
   bool isSLC() const { return isImmTy(ImmTySLC); }
   bool isTFE() const { return isImmTy(ImmTyTFE); }
@@ -676,6 +678,7 @@ public:
     case ImmTyInstOffset: OS << "InstOffset"; break;
     case ImmTyOffset0: OS << "Offset0"; break;
     case ImmTyOffset1: OS << "Offset1"; break;
+    case ImmTyDLC: OS << "DLC"; break;
     case ImmTyGLC: OS << "GLC"; break;
     case ImmTySLC: OS << "SLC"; break;
     case ImmTyTFE: OS << "TFE"; break;
@@ -1184,6 +1187,7 @@ public:
   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
 
+  AMDGPUOperand::Ptr defaultDLC() const;
   AMDGPUOperand::Ptr defaultGLC() const;
   AMDGPUOperand::Ptr defaultSLC() const;
 
@@ -2303,13 +2307,26 @@ unsigned AMDGPUAsmParser::checkTargetMat
     }
   }
 
-  if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
+  if (TSFlags & SIInstrFlags::FLAT) {
     // FIXME: Produces error without correct column reported.
-    auto OpNum =
-        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
+    auto Opcode = Inst.getOpcode();
+    auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+
     const auto &Op = Inst.getOperand(OpNum);
-    if (Op.getImm() != 0)
+    if (!hasFlatOffsets() && Op.getImm() != 0)
       return Match_InvalidOperand;
+
+    // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
+    // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
+    if (isGFX10()) {
+      if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
+        if (!isInt<12>(Op.getImm()))
+          return Match_InvalidOperand;
+      } else {
+        if (!isUInt<11>(Op.getImm()))
+          return Match_InvalidOperand;
+      }
+    }
   }
 
   return Match_Success;
@@ -3887,6 +3904,9 @@ AMDGPUAsmParser::parseNamedBit(const cha
     }
   }
 
+  if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
+    return MatchOperand_ParseFail;
+
   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
   return MatchOperand_Success;
 }
@@ -5101,6 +5121,10 @@ AMDGPUAsmParser::parseSOppBrTarget(Opera
 // mubuf
 //===----------------------------------------------------------------------===//
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
+}
+
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
 }
@@ -5177,6 +5201,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCIns
   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   }
+
+  if (isGFX10())
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
 }
 
 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
@@ -5214,6 +5241,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &I
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+
+  if (isGFX10())
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
 }
 
 //===----------------------------------------------------------------------===//
@@ -5249,8 +5279,12 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &In
     }
   }
 
+  bool IsGFX10 = isGFX10();
+
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
+  if (IsGFX10)
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
@@ -5353,6 +5387,7 @@ static const OptionalOperand AMDGPUOptio
   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
+  {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
@@ -5581,7 +5616,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &In
     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
   }
 
-  // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
+  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
   // it has src2 register operand that is tied to dst operand
   // we don't allow modifiers for this operand in assembler so src2_modifiers
   // should be 0.
@@ -6031,7 +6066,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &In
       break;
 
     case SIInstrFlags::VOPC:
-      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
+        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
       break;

Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Tue Apr 30 15:08:23 2019
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">;
 def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
 
 def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
 def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
 
-def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">;
 def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
 def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
 
@@ -96,7 +96,9 @@ class MTBUF_Pseudo <string opName, dag o
   bits<1> has_vdata   = 1;
   bits<1> has_vaddr   = 1;
   bits<1> has_glc     = 1;
+  bits<1> has_dlc     = 1;
   bits<1> glc_value   = 0; // the value for glc if no such operand
+  bits<1> dlc_value   = 0; // the value for dlc if no such operand
   bits<1> has_srsrc   = 1;
   bits<1> has_soffset = 1;
   bits<1> has_offset  = 1;
@@ -119,6 +121,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
 
   bits<12> offset;
   bits<1>  glc;
+  bits<1>  dlc;
   bits<7>  format;
   bits<8>  vaddr;
   bits<8>  vdata;
@@ -137,17 +140,17 @@ class getMTBUFInsDA<list<RegisterClass>
   RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
   dag InsNoData = !if(!empty(vaddrList),
     (ins                    SReg_128:$srsrc, SCSrc_b32:$soffset,
-         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
+         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
     (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
-         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
+         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
   );
   dag InsData = !if(!empty(vaddrList),
     (ins vdataClass:$vdata,                    SReg_128:$srsrc,
          SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
-         SLC:$slc, TFE:$tfe),
+         SLC:$slc, TFE:$tfe, DLC:$dlc),
     (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
          SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
-         SLC:$slc, TFE:$tfe)
+         SLC:$slc, TFE:$tfe, DLC:$dlc)
   );
   dag ret = !if(!empty(vdataList), InsNoData, InsData);
 }
@@ -198,7 +201,7 @@ class MTBUF_Load_Pseudo <string opName,
   : MTBUF_Pseudo<opName,
                  (outs vdataClass:$vdata),
                  getMTBUFIns<addrKindCopy>.ret,
-                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
                  pattern>,
     MTBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -213,13 +216,13 @@ multiclass MTBUF_Pseudo_Loads<string opN
   def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(set load_vt:$vdata,
      (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
-                      i1:$glc, i1:$slc, i1:$tfe)))]>,
+                      i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
     MTBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(set load_vt:$vdata,
      (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
-                      i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
+                      i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
     MTBUFAddr64Table<1, NAME>;
 
   def _OFFEN  : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -244,7 +247,7 @@ class MTBUF_Store_Pseudo <string opName,
   : MTBUF_Pseudo<opName,
                  (outs),
                  getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
-                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
                  pattern>,
     MTBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -259,13 +262,13 @@ multiclass MTBUF_Pseudo_Stores<string op
   def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
                                        i16:$offset, i8:$format, i1:$glc,
-                                       i1:$slc, i1:$tfe))]>,
+                                       i1:$slc, i1:$tfe, i1:$dlc))]>,
     MTBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
                                        i16:$offset, i8:$format, i1:$glc,
-                                       i1:$slc, i1:$tfe))]>,
+                                       i1:$slc, i1:$tfe, i1:$dlc))]>,
     MTBUFAddr64Table<1, NAME>;
 
   def _OFFEN  : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -323,7 +326,9 @@ class MUBUF_Pseudo <string opName, dag o
   bits<1> has_vdata   = 1;
   bits<1> has_vaddr   = 1;
   bits<1> has_glc     = 1;
+  bits<1> has_dlc     = 1;
   bits<1> glc_value   = 0; // the value for glc if no such operand
+  bits<1> dlc_value   = 0; // the value for dlc if no such operand
   bits<1> has_srsrc   = 1;
   bits<1> has_soffset = 1;
   bits<1> has_offset  = 1;
@@ -332,7 +337,7 @@ class MUBUF_Pseudo <string opName, dag o
   bits<4> dwords      = 0;
 }
 
-class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
+class MUBUF_Real <MUBUF_Pseudo ps> :
   InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
 
   let isPseudo = 0;
@@ -347,6 +352,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseu
 
   bits<12> offset;
   bits<1>  glc;
+  bits<1>  dlc;
   bits<8>  vaddr;
   bits<8>  vdata;
   bits<7>  srsrc;
@@ -357,7 +363,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseu
 
 
 // For cache invalidation instructions.
-class MUBUF_Invalidate <string opName, SDPatternOperator node> :
+class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
   MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> {
 
   let AsmMatchConverter = "";
@@ -372,7 +378,9 @@ class MUBUF_Invalidate <string opName, S
   let has_vdata   = 0;
   let has_vaddr   = 0;
   let has_glc     = 0;
+  let has_dlc     = 0;
   let glc_value   = 0;
+  let dlc_value   = 0;
   let has_srsrc   = 0;
   let has_soffset = 0;
   let has_offset  = 0;
@@ -399,7 +407,7 @@ class getMUBUFInsDA<list<RegisterClass>
   );
   dag ret = !con(
               !if(!empty(vdataList), InsNoData, InsData),
-              !if(isLds, (ins), (ins TFE:$tfe))
+              !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
              );
 }
 
@@ -459,7 +467,7 @@ class MUBUF_Load_Pseudo <string opName,
                  !con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
                       !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))),
                  " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
-                   !if(isLds, " lds", "$tfe"),
+                   !if(isLds, " lds", "$tfe") # "$dlc",
                  pattern>,
     MUBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # !if(isLds, "_lds", "") #
@@ -489,7 +497,7 @@ multiclass MUBUF_Pseudo_Loads<string opN
     !if(isLds,
         [],
         [(set load_vt:$vdata,
-         (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+         (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
     MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
 
   def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
@@ -497,7 +505,7 @@ multiclass MUBUF_Pseudo_Loads<string opN
     !if(isLds,
         [],
         [(set load_vt:$vdata,
-         (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+         (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
     MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
 
   def _OFFEN  : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>;
@@ -530,7 +538,7 @@ class MUBUF_Store_Pseudo <string opName,
   : MUBUF_Pseudo<opName,
                  (outs),
                  getMUBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
-                 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
                  pattern>,
     MUBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -546,12 +554,12 @@ multiclass MUBUF_Pseudo_Stores<string op
 
   def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
     MUBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
     MUBUFAddr64Table<1, NAME>;
 
   def _OFFEN  : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -637,6 +645,7 @@ class MUBUF_Atomic_Pseudo<string opName,
   let hasSideEffects = 1;
   let DisableWQM = 1;
   let has_glc = 0;
+  let has_dlc = 0;
   let has_tfe = 0;
   let maybeAtomic = 1;
 }
@@ -655,6 +664,7 @@ class MUBUF_AtomicNoRet_Pseudo<string op
     AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
   let glc_value = 0;
+  let dlc_value = 0;
   let AsmMatchConverter = "cvtMubufAtomic";
 }
 
@@ -672,6 +682,7 @@ class MUBUF_AtomicRet_Pseudo<string opNa
     AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
   let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
   let glc_value = 1;
+  let dlc_value = 0;
   let Constraints = "$vdata = $vdata_in";
   let DisableEncoding = "$vdata_in";
   let AsmMatchConverter = "cvtMubufAtomicReturn";
@@ -1051,6 +1062,11 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidat
 
 } // End let SubtargetPredicate = isGFX7Plus
 
+let SubtargetPredicate = isGFX10Plus in {
+  def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
+  def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
+} // End SubtargetPredicate = isGFX10Plus
+
 //===----------------------------------------------------------------------===//
 // MUBUF Patterns
 //===----------------------------------------------------------------------===//
@@ -1063,6 +1079,10 @@ def extract_slc : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
 }]>;
 
+def extract_dlc : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // buffer_load/store_format patterns
 //===----------------------------------------------------------------------===//
@@ -1073,21 +1093,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatt
     (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, imm)),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1096,7 +1116,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatt
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 }
 
@@ -1144,21 +1164,23 @@ multiclass MUBUF_StoreIntrinsicPat<SDPat
     (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
-      (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (as_i16imm $offset), (extract_glc $cachepolicy),
+      (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, imm),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
-      (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (as_i16imm $offset), (extract_glc $cachepolicy),
+      (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1167,8 +1189,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPat
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
+      (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 }
 
@@ -1322,8 +1344,8 @@ def : GCNPat<
 class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
                               PatFrag constant_ld> : GCNPat <
      (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
-     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
   >;
 
 multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
@@ -1331,12 +1353,12 @@ multiclass MUBUFLoad_Atomic_Pattern <MUB
   def : GCNPat <
      (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
                                    i16:$offset, i1:$slc))),
-     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
   >;
 
   def : GCNPat <
     (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
-    (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+    (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
   >;
 }
 
@@ -1355,8 +1377,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseu
 
   def : GCNPat <
     (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                          i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
-    (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+                          i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+    (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
   >;
 }
 
@@ -1377,12 +1399,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Ps
   def : GCNPat <
     (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
                                i32:$soffset, u16imm:$offset))),
-    (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 
   def : GCNPat <
     (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
-    (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 }
 
@@ -1392,12 +1414,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBU
                                 ValueType vt, PatFrag ld_frag> {
   def : GCNPat <
     (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
-    (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in)
+    (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
   >;
 
   def : GCNPat <
     (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
-    (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in)
+    (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
   >;
 }
 
@@ -1435,12 +1457,12 @@ multiclass MUBUFStore_Atomic_Pattern <MU
   def : GCNPat <
      (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
                                    i16:$offset, i1:$slc), vt:$val),
-     (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+     (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
   >;
 
   def : GCNPat <
     (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
-    (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+    (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
   >;
 }
 let SubtargetPredicate = isGFX6GFX7 in {
@@ -1454,8 +1476,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pse
 
   def : GCNPat <
     (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                                      i16:$offset, i1:$glc, i1:$slc, i1:$tfe)),
-    (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+                                      i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
+    (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
   >;
 }
 
@@ -1468,13 +1490,13 @@ multiclass MUBUFScratchStorePat <MUBUF_P
   def : GCNPat <
     (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
                                       i32:$soffset, u16imm:$offset)),
-    (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 
   def : GCNPat <
     (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
                                        u16imm:$offset)),
-    (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 }
 
@@ -1512,7 +1534,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
               imm:$format, imm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1520,7 +1542,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
               imm:$format, imm:$cachepolicy, imm)),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1528,7 +1550,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
               imm:$format, imm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1538,7 +1560,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 }
 
@@ -1570,7 +1592,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
           imm:$format, imm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1578,7 +1600,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
           imm:$format, imm:$cachepolicy, imm),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1586,7 +1608,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
           imm:$format, imm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1596,7 +1618,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 }
 
@@ -1626,24 +1648,18 @@ let SubtargetPredicate = HasPackedD16VMe
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Base ENC_MUBUF for GFX6, GFX7.
+// Base ENC_MUBUF for GFX6, GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real<op, ps>,
-  Enc64,
-  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
-  let AssemblerPredicate=isGFX6GFX7;
-  let DecoderNamespace="GFX6GFX7";
-
+class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
+    MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
   let Inst{11-0}  = !if(ps.has_offset, offset, ?);
   let Inst{12}    = ps.offen;
   let Inst{13}    = ps.idxen;
   let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
-  let Inst{15}    = ps.addr64;
   let Inst{16}    = !if(ps.lds, 1, 0);
   let Inst{24-18} = op;
-  let Inst{31-26} = 0x38; //encoding
+  let Inst{31-26} = 0x38;
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
   let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
@@ -1652,125 +1668,250 @@ class MUBUF_Real_si <bits<7> op, MUBUF_P
   let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
 }
 
-multiclass MUBUF_Real_AllAddr_si<bits<7> op> {
-  def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
-  def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
-  def _OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
-  def _IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
-  def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
-
-multiclass MUBUF_Real_AllAddr_Lds_si<bits<7> op> {
-
-  def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
-                   MUBUFLdsTable<0, NAME # "_OFFSET_si">;
-  def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
-                   MUBUFLdsTable<0, NAME # "_ADDR64_si">;
-  def _OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
-                   MUBUFLdsTable<0, NAME # "_OFFEN_si">;
-  def _IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
-                   MUBUFLdsTable<0, NAME # "_IDXEN_si">;
-  def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
-                   MUBUFLdsTable<0, NAME # "_BOTHEN_si">;
-
-  def _LDS_OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
-                       MUBUFLdsTable<1, NAME # "_OFFSET_si">;
-  def _LDS_ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
-                       MUBUFLdsTable<1, NAME # "_ADDR64_si">;
-  def _LDS_OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
-                       MUBUFLdsTable<1, NAME # "_OFFEN_si">;
-  def _LDS_IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
-                       MUBUFLdsTable<1, NAME # "_IDXEN_si">;
-  def _LDS_BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
-                       MUBUFLdsTable<1, NAME # "_BOTHEN_si">;
-}
-
-multiclass MUBUF_Real_Atomic_si<bits<7> op> : MUBUF_Real_AllAddr_si<op> {
-  def _OFFSET_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
-  def _ADDR64_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
-  def _OFFEN_RTN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
-  def _IDXEN_RTN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
-  def _BOTHEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
-}
-
-defm BUFFER_LOAD_FORMAT_X       : MUBUF_Real_AllAddr_Lds_si <0x00>;
-defm BUFFER_LOAD_FORMAT_XY      : MUBUF_Real_AllAddr_si <0x01>;
-defm BUFFER_LOAD_FORMAT_XYZ     : MUBUF_Real_AllAddr_si <0x02>;
-defm BUFFER_LOAD_FORMAT_XYZW    : MUBUF_Real_AllAddr_si <0x03>;
-defm BUFFER_STORE_FORMAT_X      : MUBUF_Real_AllAddr_si <0x04>;
-defm BUFFER_STORE_FORMAT_XY     : MUBUF_Real_AllAddr_si <0x05>;
-defm BUFFER_STORE_FORMAT_XYZ    : MUBUF_Real_AllAddr_si <0x06>;
-defm BUFFER_STORE_FORMAT_XYZW   : MUBUF_Real_AllAddr_si <0x07>;
-defm BUFFER_LOAD_UBYTE          : MUBUF_Real_AllAddr_Lds_si <0x08>;
-defm BUFFER_LOAD_SBYTE          : MUBUF_Real_AllAddr_Lds_si <0x09>;
-defm BUFFER_LOAD_USHORT         : MUBUF_Real_AllAddr_Lds_si <0x0a>;
-defm BUFFER_LOAD_SSHORT         : MUBUF_Real_AllAddr_Lds_si <0x0b>;
-defm BUFFER_LOAD_DWORD          : MUBUF_Real_AllAddr_Lds_si <0x0c>;
-defm BUFFER_LOAD_DWORDX2        : MUBUF_Real_AllAddr_si <0x0d>;
-defm BUFFER_LOAD_DWORDX4        : MUBUF_Real_AllAddr_si <0x0e>;
-defm BUFFER_LOAD_DWORDX3        : MUBUF_Real_AllAddr_si <0x0f>;
-defm BUFFER_STORE_BYTE          : MUBUF_Real_AllAddr_si <0x18>;
-defm BUFFER_STORE_SHORT         : MUBUF_Real_AllAddr_si <0x1a>;
-defm BUFFER_STORE_DWORD         : MUBUF_Real_AllAddr_si <0x1c>;
-defm BUFFER_STORE_DWORDX2       : MUBUF_Real_AllAddr_si <0x1d>;
-defm BUFFER_STORE_DWORDX4       : MUBUF_Real_AllAddr_si <0x1e>;
-defm BUFFER_STORE_DWORDX3       : MUBUF_Real_AllAddr_si <0x1f>;
-
-defm BUFFER_ATOMIC_SWAP         : MUBUF_Real_Atomic_si <0x30>;
-defm BUFFER_ATOMIC_CMPSWAP      : MUBUF_Real_Atomic_si <0x31>;
-defm BUFFER_ATOMIC_ADD          : MUBUF_Real_Atomic_si <0x32>;
-defm BUFFER_ATOMIC_SUB          : MUBUF_Real_Atomic_si <0x33>;
-//defm BUFFER_ATOMIC_RSUB         : MUBUF_Real_Atomic_si <0x34>;    // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN         : MUBUF_Real_Atomic_si <0x35>;
-defm BUFFER_ATOMIC_UMIN         : MUBUF_Real_Atomic_si <0x36>;
-defm BUFFER_ATOMIC_SMAX         : MUBUF_Real_Atomic_si <0x37>;
-defm BUFFER_ATOMIC_UMAX         : MUBUF_Real_Atomic_si <0x38>;
-defm BUFFER_ATOMIC_AND          : MUBUF_Real_Atomic_si <0x39>;
-defm BUFFER_ATOMIC_OR           : MUBUF_Real_Atomic_si <0x3a>;
-defm BUFFER_ATOMIC_XOR          : MUBUF_Real_Atomic_si <0x3b>;
-defm BUFFER_ATOMIC_INC          : MUBUF_Real_Atomic_si <0x3c>;
-defm BUFFER_ATOMIC_DEC          : MUBUF_Real_Atomic_si <0x3d>;
-
-//defm BUFFER_ATOMIC_FCMPSWAP     : MUBUF_Real_Atomic_si <0x3e>;    // isn't on VI
-//defm BUFFER_ATOMIC_FMIN         : MUBUF_Real_Atomic_si <0x3f>;    // isn't on VI
-//defm BUFFER_ATOMIC_FMAX         : MUBUF_Real_Atomic_si <0x40>;    // isn't on VI
-defm BUFFER_ATOMIC_SWAP_X2      : MUBUF_Real_Atomic_si <0x50>;
-defm BUFFER_ATOMIC_CMPSWAP_X2   : MUBUF_Real_Atomic_si <0x51>;
-defm BUFFER_ATOMIC_ADD_X2       : MUBUF_Real_Atomic_si <0x52>;
-defm BUFFER_ATOMIC_SUB_X2       : MUBUF_Real_Atomic_si <0x53>;
-//defm BUFFER_ATOMIC_RSUB_X2      : MUBUF_Real_Atomic_si <0x54>;    // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN_X2      : MUBUF_Real_Atomic_si <0x55>;
-defm BUFFER_ATOMIC_UMIN_X2      : MUBUF_Real_Atomic_si <0x56>;
-defm BUFFER_ATOMIC_SMAX_X2      : MUBUF_Real_Atomic_si <0x57>;
-defm BUFFER_ATOMIC_UMAX_X2      : MUBUF_Real_Atomic_si <0x58>;
-defm BUFFER_ATOMIC_AND_X2       : MUBUF_Real_Atomic_si <0x59>;
-defm BUFFER_ATOMIC_OR_X2        : MUBUF_Real_Atomic_si <0x5a>;
-defm BUFFER_ATOMIC_XOR_X2       : MUBUF_Real_Atomic_si <0x5b>;
-defm BUFFER_ATOMIC_INC_X2       : MUBUF_Real_Atomic_si <0x5c>;
-defm BUFFER_ATOMIC_DEC_X2       : MUBUF_Real_Atomic_si <0x5d>;
-// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI.
-//defm BUFFER_ATOMIC_FCMPSWAP_X2  : MUBUF_Real_Atomic_si <0x5e">;   // isn't on VI
-//defm BUFFER_ATOMIC_FMIN_X2      : MUBUF_Real_Atomic_si <0x5f>;    // isn't on VI
-//defm BUFFER_ATOMIC_FMAX_X2      : MUBUF_Real_Atomic_si <0x60>;    // isn't on VI
+class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
+    Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
+  let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+  let Inst{25} = op{7};
+}
 
-def BUFFER_WBINVL1_SC_si        : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>;
-def BUFFER_WBINVL1_si           : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
+class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> :
+    Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
+  let Inst{15} = ps.addr64;
+}
 
-class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
-  MTBUF_Real<ps>,
-  Enc64,
-  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
-  let AssemblerPredicate=isGFX6GFX7;
-  let DecoderNamespace="GFX6GFX7";
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass MUBUF_Real_gfx10_with_name<bits<8> op, string opName,
+                                        string asmName> {
+    def _gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(opName)> {
+      MUBUF_Pseudo ps = !cast<MUBUF_Pseudo>(opName);
+      let AsmString = asmName # ps.AsmOperands;
+    }
+  }
+  multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> {
+    def _BOTHEN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+  multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> {
+    def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+                        MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">;
+    def _OFFEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+                        MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">;
+    def _IDXEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+                        MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">;
+    def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+                        MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">;
+
+    def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+                            MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">;
+    def _LDS_OFFEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+                            MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">;
+    def _LDS_IDXEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+                            MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">;
+    def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+                            MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
+  }
+  multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
+      MUBUF_Real_AllAddr_gfx10<op> {
+    def _BOTHEN_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+    def _IDXEN_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+    def _OFFEN_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+    def _OFFSET_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm BUFFER_STORE_BYTE_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x019>;
+defm BUFFER_STORE_SHORT_D16_HI    : MUBUF_Real_AllAddr_gfx10<0x01b>;
+defm BUFFER_LOAD_UBYTE_D16        : MUBUF_Real_AllAddr_gfx10<0x020>;
+defm BUFFER_LOAD_UBYTE_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x021>;
+defm BUFFER_LOAD_SBYTE_D16        : MUBUF_Real_AllAddr_gfx10<0x022>;
+defm BUFFER_LOAD_SBYTE_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x023>;
+defm BUFFER_LOAD_SHORT_D16        : MUBUF_Real_AllAddr_gfx10<0x024>;
+defm BUFFER_LOAD_SHORT_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x025>;
+// FIXME-GFX10: Add following instructions:
+//defm BUFFER_LOAD_FORMAT_D16_HI_X  : MUBUF_Real_AllAddr_gfx10<0x026>;
+//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
+defm BUFFER_LOAD_FORMAT_D16_X     : MUBUF_Real_AllAddr_gfx10<0x080>;
+defm BUFFER_LOAD_FORMAT_D16_XY    : MUBUF_Real_AllAddr_gfx10<0x081>;
+defm BUFFER_LOAD_FORMAT_D16_XYZ   : MUBUF_Real_AllAddr_gfx10<0x082>;
+defm BUFFER_LOAD_FORMAT_D16_XYZW  : MUBUF_Real_AllAddr_gfx10<0x083>;
+defm BUFFER_STORE_FORMAT_D16_X    : MUBUF_Real_AllAddr_gfx10<0x084>;
+defm BUFFER_STORE_FORMAT_D16_XY   : MUBUF_Real_AllAddr_gfx10<0x085>;
+defm BUFFER_STORE_FORMAT_D16_XYZ  : MUBUF_Real_AllAddr_gfx10<0x086>;
+defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>;
+
+def BUFFER_GL0_INV_gfx10 :
+  MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>;
+def BUFFER_GL1_INV_gfx10 :
+  MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in {
+  multiclass MUBUF_Real_gfx6<bits<8> op> {
+    def _gfx6 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+  }
+} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6"
+
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+  multiclass MUBUF_Real_gfx7<bits<8> op> {
+    def _gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+  }
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
 
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass MUBUF_Real_AllAddr_gfx6_gfx7<bits<8> op> {
+    def _ADDR64_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
+    def _BOTHEN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+  multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> {
+    def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+                            MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">;
+    def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
+                            MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">;
+    def _OFFEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+                            MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">;
+    def _IDXEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+                            MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">;
+    def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+                            MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">;
+
+    def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+                                MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">;
+    def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
+                                MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">;
+    def _LDS_OFFEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+                                MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">;
+    def _LDS_IDXEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+                                MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">;
+    def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+                                MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
+  }
+  multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
+      MUBUF_Real_AllAddr_gfx6_gfx7<op> {
+    def _ADDR64_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
+    def _BOTHEN_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+    def _IDXEN_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+    def _OFFEN_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+    def _OFFSET_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+  }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
+  MUBUF_Real_AllAddr_gfx6_gfx7<op>, MUBUF_Real_AllAddr_gfx10<op>;
+
+multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> :
+  MUBUF_Real_AllAddr_Lds_gfx6_gfx7<op>, MUBUF_Real_AllAddr_Lds_gfx10<op>;
+
+multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
+  MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
+
+// FIXME-GFX6: Following instructions are available only on GFX6.
+//defm BUFFER_ATOMIC_RSUB         : MUBUF_Real_Atomics_gfx6 <0x034>;
+//defm BUFFER_ATOMIC_RSUB_X2      : MUBUF_Real_Atomics_gfx6 <0x054>;
+
+defm BUFFER_LOAD_FORMAT_X     : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>;
+defm BUFFER_LOAD_FORMAT_XY    : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm BUFFER_LOAD_FORMAT_XYZ   : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm BUFFER_LOAD_FORMAT_XYZW  : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm BUFFER_STORE_FORMAT_X    : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm BUFFER_STORE_FORMAT_XY   : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm BUFFER_STORE_FORMAT_XYZ  : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
+defm BUFFER_LOAD_UBYTE        : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>;
+defm BUFFER_LOAD_SBYTE        : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>;
+defm BUFFER_LOAD_USHORT       : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>;
+defm BUFFER_LOAD_SSHORT       : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>;
+defm BUFFER_LOAD_DWORD        : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>;
+defm BUFFER_LOAD_DWORDX2      : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>;
+defm BUFFER_LOAD_DWORDX4      : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>;
+defm BUFFER_LOAD_DWORDX3      : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>;
+defm BUFFER_STORE_BYTE        : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>;
+defm BUFFER_STORE_SHORT       : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>;
+defm BUFFER_STORE_DWORD       : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>;
+defm BUFFER_STORE_DWORDX2     : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>;
+defm BUFFER_STORE_DWORDX4     : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>;
+defm BUFFER_STORE_DWORDX3     : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>;
+
+defm BUFFER_ATOMIC_SWAP        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>;
+defm BUFFER_ATOMIC_CMPSWAP     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>;
+defm BUFFER_ATOMIC_ADD         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>;
+defm BUFFER_ATOMIC_SUB         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>;
+defm BUFFER_ATOMIC_SMIN        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>;
+defm BUFFER_ATOMIC_UMIN        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>;
+defm BUFFER_ATOMIC_SMAX        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>;
+defm BUFFER_ATOMIC_UMAX        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>;
+defm BUFFER_ATOMIC_AND         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>;
+defm BUFFER_ATOMIC_OR          : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
+defm BUFFER_ATOMIC_XOR         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
+defm BUFFER_ATOMIC_INC         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
+defm BUFFER_ATOMIC_DEC         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP    : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
+//defm BUFFER_ATOMIC_FMIN        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
+//defm BUFFER_ATOMIC_FMAX        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
+defm BUFFER_ATOMIC_SWAP_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
+defm BUFFER_ATOMIC_CMPSWAP_X2  : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
+defm BUFFER_ATOMIC_ADD_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
+defm BUFFER_ATOMIC_SUB_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>;
+defm BUFFER_ATOMIC_SMIN_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>;
+defm BUFFER_ATOMIC_UMIN_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>;
+defm BUFFER_ATOMIC_SMAX_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>;
+defm BUFFER_ATOMIC_UMAX_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>;
+defm BUFFER_ATOMIC_AND_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>;
+defm BUFFER_ATOMIC_OR_X2       : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>;
+defm BUFFER_ATOMIC_XOR_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
+defm BUFFER_ATOMIC_INC_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
+defm BUFFER_ATOMIC_DEC_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
+// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
+//defm BUFFER_ATOMIC_FMIN_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
+//defm BUFFER_ATOMIC_FMAX_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
+
+defm BUFFER_WBINVL1_SC        : MUBUF_Real_gfx6<0x070>;
+defm BUFFER_WBINVL1_VOL       : MUBUF_Real_gfx7<0x070>;
+def  BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
+
+//===----------------------------------------------------------------------===//
+// Base ENC_MTBUF for GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
+    MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
   let Inst{11-0}  = !if(ps.has_offset, offset, ?);
   let Inst{12}    = ps.offen;
   let Inst{13}    = ps.idxen;
   let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
-  let Inst{15}    = ps.addr64;
   let Inst{18-16} = op;
-  let Inst{22-19} = dfmt;
-  let Inst{25-23} = nfmt;
   let Inst{31-26} = 0x3a; //encoding
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1780,43 +1921,83 @@ class MTBUF_Real_si <bits<3> op, MTBUF_P
   let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
 }
 
-multiclass MTBUF_Real_AllAddr_si<bits<3> op> {
-  def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
-  def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
-  def _OFFEN_si  : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
-  def _IDXEN_si  : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
-  def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
+//===----------------------------------------------------------------------===//
+// MTBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
+    Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
+  let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+  let Inst{25-19} = format;
+  let Inst{53} = op{3};
+}
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> {
+    def _BOTHEN_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
 
-defm TBUFFER_LOAD_FORMAT_X     : MTBUF_Real_AllAddr_si <0>;
-defm TBUFFER_LOAD_FORMAT_XY    : MTBUF_Real_AllAddr_si <1>;
-defm TBUFFER_LOAD_FORMAT_XYZ   : MTBUF_Real_AllAddr_si <2>;
-defm TBUFFER_LOAD_FORMAT_XYZW  : MTBUF_Real_AllAddr_si <3>;
-defm TBUFFER_STORE_FORMAT_X    : MTBUF_Real_AllAddr_si <4>;
-defm TBUFFER_STORE_FORMAT_XY   : MTBUF_Real_AllAddr_si <5>;
-defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Real_AllAddr_si <6>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
+defm TBUFFER_LOAD_FORMAT_D16_X     : MTBUF_Real_AllAddr_gfx10<0x008>;
+defm TBUFFER_LOAD_FORMAT_D16_XY    : MTBUF_Real_AllAddr_gfx10<0x009>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZ   : MTBUF_Real_AllAddr_gfx10<0x00a>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZW  : MTBUF_Real_AllAddr_gfx10<0x00b>;
+defm TBUFFER_STORE_FORMAT_D16_X    : MTBUF_Real_AllAddr_gfx10<0x00c>;
+defm TBUFFER_STORE_FORMAT_D16_XY   : MTBUF_Real_AllAddr_gfx10<0x00d>;
+defm TBUFFER_STORE_FORMAT_D16_XYZ  : MTBUF_Real_AllAddr_gfx10<0x00e>;
+defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>;
 
 //===----------------------------------------------------------------------===//
-// CI
-// MTBUF - GFX6, GFX7.
+// MTBUF - GFX6, GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real_si<op, ps> {
-  let AssemblerPredicate = isGFX7Only;
-  let DecoderNamespace = "GFX7";
+class MTBUF_Real_gfx6_gfx7<bits<4> op, MTBUF_Pseudo ps> :
+    Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.SI> {
+  let Inst{15} = ps.addr64;
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
 }
 
-def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>;
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass MTBUF_Real_AllAddr_gfx6_gfx7<bits<4> op> {
+    def _ADDR64_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
+    def _BOTHEN_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<4> op> :
+  MTBUF_Real_AllAddr_gfx6_gfx7<op>, MTBUF_Real_AllAddr_gfx10<op>;
 
+defm TBUFFER_LOAD_FORMAT_X     : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>;
+defm TBUFFER_LOAD_FORMAT_XY    : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm TBUFFER_LOAD_FORMAT_XYZ   : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm TBUFFER_LOAD_FORMAT_XYZW  : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm TBUFFER_STORE_FORMAT_X    : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm TBUFFER_STORE_FORMAT_XY   : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
 
 //===----------------------------------------------------------------------===//
-//  GFX8, GFX9 (VI).
+// GFX8, GFX9 (VI).
 //===----------------------------------------------------------------------===//
 
 class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real<op, ps>,
+  MUBUF_Real<ps>,
   Enc64,
   SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
   let AssemblerPredicate = isGFX8GFX9;
@@ -1866,7 +2047,7 @@ multiclass MUBUF_Real_AllAddr_Lds_vi<bit
 }
 
 class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real<op, ps>,
+  MUBUF_Real<ps>,
   Enc64,
   SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
   let AssemblerPredicate=HasUnpackedD16VMem;

Modified: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td Tue Apr 30 15:08:23 2019
@@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
-def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [], -10>;
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
+def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
 
-def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [], -10>;
-def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [], -10>;
+def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
+def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
 
 //===----------------------------------------------------------------------===//
 // FLAT classes
@@ -51,6 +51,8 @@ class FLAT_Pseudo<string opName, dag out
   bits<1> has_data = 1;
   bits<1> has_glc  = 1;
   bits<1> glcValue = 0;
+  bits<1> has_dlc  = 1;
+  bits<1> dlcValue = 0;
 
   let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
     !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -88,6 +90,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo
 
   bits<1> slc;
   bits<1> glc;
+  bits<1> dlc;
 
   // Only valid on gfx9
   bits<1> lds = 0; // XXX - What does this actually do?
@@ -141,9 +144,9 @@ class FLAT_Load_Pseudo <string opName, R
         !con((ins VReg_64:$vaddr),
           !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
             (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
-            (ins GLC:$glc, SLC:$slc)),
+            (ins GLC:$glc, SLC:$slc, DLC:$dlc)),
             !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
-  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
   let has_data = 0;
   let mayLoad = 1;
   let has_saddr = HasSaddr;
@@ -164,8 +167,8 @@ class FLAT_Store_Pseudo <string opName,
       !con((ins VReg_64:$vaddr, vdataClass:$vdata),
         !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
           (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
-          (ins GLC:$glc, SLC:$slc)),
-  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+          (ins GLC:$glc, SLC:$slc, DLC:$dlc)),
+  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
   let mayLoad  = 0;
   let mayStore = 1;
   let has_vdst = 0;
@@ -198,9 +201,9 @@ class FLAT_Scratch_Load_Pseudo <string o
   opName,
   (outs regClass:$vdst),
   !if(EnableSaddr,
-      (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
-      (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
-  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
+      (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+      (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
   let has_data = 0;
   let mayLoad = 1;
   let has_saddr = 1;
@@ -214,9 +217,9 @@ class FLAT_Scratch_Store_Pseudo <string
   opName,
   (outs),
   !if(EnableSaddr,
-    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
-    (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
-  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
+    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+    (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
   let mayLoad  = 0;
   let mayStore = 1;
   let has_vdst = 0;
@@ -248,6 +251,8 @@ class FLAT_AtomicNoRet_Pseudo<string opN
     let mayStore = 1;
     let has_glc  = 0;
     let glcValue = 0;
+    let has_dlc  = 0;
+    let dlcValue = 0;
     let has_vdst = 0;
     let maybeAtomic = 1;
 }
@@ -258,6 +263,7 @@ class FLAT_AtomicRet_Pseudo<string opNam
   let hasPostISelHook = 1;
   let has_vdst = 1;
   let glcValue = 1;
+  let dlcValue = 0;
   let PseudoInstr = NAME # "_RTN";
 }
 
@@ -492,8 +498,8 @@ defm FLAT_ATOMIC_INC_X2     : FLAT_Atomi
 defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
                                 VReg_64, i64, atomic_dec_flat>;
 
-// GFX7-only flat instructions.
-let SubtargetPredicate = isGFX7Only in {
+// GFX7-, GFX10-only flat instructions.
+let SubtargetPredicate = isGFX7GFX10 in {
 
 defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
                                 VGPR_32, f32, null_frag, v2f32, VReg_64>;
@@ -513,7 +519,7 @@ defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atom
 defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
                                 VReg_64, f64>;
 
-} // End SubtargetPredicate = isGFX7Only
+} // End SubtargetPredicate = isGFX7GFX10
 
 let SubtargetPredicate = HasFlatGlobalInsts in {
 defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
@@ -656,6 +662,22 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_S
 
 } // End SubtargetPredicate = HasFlatScratchInsts
 
+let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
+  defm GLOBAL_ATOMIC_FCMPSWAP :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
+  defm GLOBAL_ATOMIC_FMIN :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
+  defm GLOBAL_ATOMIC_FMAX :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
+  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
+  defm GLOBAL_ATOMIC_FMIN_X2 :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
+  defm GLOBAL_ATOMIC_FMAX_X2 :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
+} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
+
+
 //===----------------------------------------------------------------------===//
 // Flat Patterns
 //===----------------------------------------------------------------------===//
@@ -663,51 +685,51 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_S
 // Patterns for global loads with no offset.
 class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
-  (inst $vaddr, $offset, 0, $slc)
+  (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
 class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
-  (inst $vaddr, $offset, 0, $slc, $in)
+  (inst $vaddr, $offset, 0, 0, $slc, $in)
 >;
 
 class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
-  (inst $vaddr, $offset, 0, $slc, $in)
+  (inst $vaddr, $offset, 0, 0, $slc, $in)
 >;
 
 class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
-  (inst $vaddr, $offset, 0, $slc)
+  (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
 class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
-  (inst $vaddr, $offset, 0, $slc)
+  (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
 class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, $data, $offset, 0, 0, $slc)
 >;
 
 class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, $data, $offset, 0, 0, $slc)
 >;
 
 class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   // atomic store follows atomic binop convention so the address comes
   // first.
   (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, $data, $offset, 0, 0, $slc)
 >;
 
 class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   // atomic store follows atomic binop convention so the address comes
   // first.
   (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, $data, $offset, 0, 0, $slc)
 >;
 
 class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
@@ -1108,3 +1130,193 @@ defm SCRATCH_STORE_DWORD        : FLAT_R
 defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
 defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
 defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
+
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
+    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
+  let AssemblerPredicate = isGFX10Plus;
+  let DecoderNamespace = "GFX10";
+
+  let Inst{11-0}  = {offset{12}, offset{10-0}};
+  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
+  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
+  let Inst{55}    = 0;
+}
+
+
+multiclass FLAT_Real_Base_gfx10<bits<7> op> {
+  def _gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
+}
+
+multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
+  def _RTN_gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+}
+
+multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
+  def _SADDR_gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+}
+
+multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
+  def _SADDR_RTN_gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+}
+
+
+multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
+  FLAT_Real_Base_gfx10<op>,
+  FLAT_Real_SADDR_gfx10<op>;
+
+multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
+  FLAT_Real_Base_gfx10<op>,
+  FLAT_Real_RTN_gfx10<op>;
+
+multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
+  FLAT_Real_AllAddr_gfx10<op>,
+  FLAT_Real_RTN_gfx10<op>,
+  FLAT_Real_SADDR_RTN_gfx10<op>;
+
+
+// ENC_FLAT.
+defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
+defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
+defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
+defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
+defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
+defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
+defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
+defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
+defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
+defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
+defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
+defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
+defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
+defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
+defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
+defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
+defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
+defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
+defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
+defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
+defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
+defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
+defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
+defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
+defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
+defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
+defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
+defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
+defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
+defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
+defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
+defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
+defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
+defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
+defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
+defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
+defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
+defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
+defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
+defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
+defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
+defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
+defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
+defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
+defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
+defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
+defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
+defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
+defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
+defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
+defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
+defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
+defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
+defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
+
+
+// ENC_FLAT_GLBL.
+defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
+defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
+defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
+defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
+defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
+defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
+defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
+defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
+defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
+defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
+defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
+defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
+defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
+defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
+defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
+defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
+defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
+defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
+defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
+defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
+defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
+defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
+defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
+defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
+defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
+defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
+defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
+defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
+defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
+defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
+defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
+defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
+defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
+defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
+defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
+defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
+defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
+defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
+defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
+defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
+defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
+defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
+
+
+// ENC_FLAT_SCRATCH.
+defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_gfx10<0x008>;
+defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_gfx10<0x009>;
+defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_gfx10<0x018>;
+defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x019>;
+defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x020>;
+defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x021>;
+defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x022>;
+defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x023>;
+defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_gfx10<0x024>;
+defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x025>;

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Tue Apr 30 15:08:23 2019
@@ -72,8 +72,14 @@ void AMDGPUInstPrinter::printU16ImmDecOp
 }
 
 void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
-  O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
+  // GFX10: Address offset is 12-bit signed byte offset.
+  if (AMDGPU::isGFX10(STI)) {
+    O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
+  } else {
+    O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
+  }
 }
 
 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
@@ -128,7 +134,7 @@ void AMDGPUInstPrinter::printOffsetS13(c
   uint16_t Imm = MI->getOperand(OpNo).getImm();
   if (Imm != 0) {
     O << ((OpNo == 0)? "offset:" : " offset:");
-    printS13ImmDecOperand(MI, OpNo, O);
+    printS13ImmDecOperand(MI, OpNo, STI, O);
   }
 }
 
@@ -173,6 +179,12 @@ void AMDGPUInstPrinter::printGDS(const M
   printNamedBit(MI, OpNo, O, "gds");
 }
 
+void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  if (AMDGPU::isGFX10(STI))
+    printNamedBit(MI, OpNo, O, "dlc");
+}
+
 void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "glc");

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Tue Apr 30 15:08:23 2019
@@ -41,7 +41,8 @@ private:
   void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
@@ -67,6 +68,8 @@ private:
                               const MCSubtargetInfo &STI, raw_ostream &O);
   void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                 raw_ostream &O);
+  void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
   void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                 raw_ostream &O);
   void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,

Modified: llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp Tue Apr 30 15:08:23 2019
@@ -197,6 +197,11 @@ static bool fixupGlobalSaddr(MachineBasi
     // Atomics dont have a GLC, so omit the field if not there.
     if (Glc)
       NewGlob->addOperand(MF, *Glc);
+
+    MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
+    if (DLC)
+      NewGlob->addOperand(MF, *DLC);
+
     NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
     // _D16 have an vdst_in operand, copy it in.
     MachineOperand *VDstInOp = TII->getNamedOperand(MI,

Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Tue Apr 30 15:08:23 2019
@@ -70,6 +70,24 @@ void SIFrameLowering::emitFlatScratchIni
 
   // Do a 64-bit pointer add.
   if (ST.flatScratchIsPointer()) {
+    if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
+        .addReg(FlatScrInitLo)
+        .addReg(ScratchWaveOffsetReg);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
+        .addReg(FlatScrInitHi)
+        .addImm(0);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+        addReg(FlatScrInitLo).
+        addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
+                       (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+        addReg(FlatScrInitHi).
+        addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
+                       (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+      return;
+    }
+
     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
       .addReg(FlatScrInitLo)
       .addReg(ScratchWaveOffsetReg);
@@ -80,6 +98,8 @@ void SIFrameLowering::emitFlatScratchIni
     return;
   }
 
+  assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
+
   // Copy the size in bytes.
   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
     .addReg(FlatScrInitHi, RegState::Kill);
@@ -423,6 +443,7 @@ void SIFrameLowering::emitEntryFunctionS
       .addReg(Rsrc01)
       .addImm(EncodedOffset) // offset
       .addImm(0) // glc
+      .addImm(0) // dlc
       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
       .addMemOperand(MMO);
     return;
@@ -463,6 +484,7 @@ void SIFrameLowering::emitEntryFunctionS
           .addReg(MFI->getImplicitBufferPtrUserSGPR())
           .addImm(0) // offset
           .addImm(0) // glc
+          .addImm(0) // dlc
           .addMemOperand(MMO)
           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
       }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Apr 30 15:08:23 2019
@@ -4166,6 +4166,10 @@ void SIInstrInfo::legalizeOperands(Machi
                 getNamedOperand(MI, AMDGPU::OpName::glc)) {
           MIB.addImm(GLC->getImm());
         }
+        if (const MachineOperand *DLC =
+                getNamedOperand(MI, AMDGPU::OpName::dlc)) {
+          MIB.addImm(DLC->getImm());
+        }
 
         MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Tue Apr 30 15:08:23 2019
@@ -830,6 +830,7 @@ def omod : NamedOperandU32<"OModSI", Nam
 def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
 def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
 
+def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
 def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
 def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;

Modified: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp Tue Apr 30 15:08:23 2019
@@ -131,6 +131,8 @@ class SILoadStoreOptimizer : public Mach
     bool GLC1;
     bool SLC0;
     bool SLC1;
+    bool DLC0;
+    bool DLC1;
     bool UseST64;
     SmallVector<MachineInstr *, 8> InstsToMove;
   };
@@ -323,7 +325,7 @@ bool SILoadStoreOptimizer::offsetsCanBeC
   if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
     return (EltOffset0 + CI.Width0 == EltOffset1 ||
             EltOffset1 + CI.Width1 == EltOffset0) &&
-           CI.GLC0 == CI.GLC1 &&
+           CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
            (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
   }
 
@@ -637,6 +639,8 @@ bool SILoadStoreOptimizer::findMatchingI
           CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
           CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
         }
+        CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
+        CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
       }
 
       // Check both offsets fit in the reduced range.
@@ -857,6 +861,7 @@ SILoadStoreOptimizer::mergeSBufferLoadIm
       .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
       .addImm(MergedOffset) // offset
       .addImm(CI.GLC0)      // glc
+      .addImm(CI.DLC0)      // dlc
       .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -909,6 +914,7 @@ SILoadStoreOptimizer::mergeBufferLoadPai
       .addImm(CI.GLC0)      // glc
       .addImm(CI.SLC0)      // slc
       .addImm(0)            // tfe
+      .addImm(CI.DLC0)      // dlc
       .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -1088,9 +1094,10 @@ SILoadStoreOptimizer::mergeBufferStorePa
   MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
       .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
       .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
-      .addImm(CI.GLC0)                          // glc
-      .addImm(CI.SLC0)                          // slc
-      .addImm(0)                                // tfe
+      .addImm(CI.GLC0)      // glc
+      .addImm(CI.SLC0)      // slc
+      .addImm(0)            // tfe
+      .addImm(CI.DLC0)      // dlc
       .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   moveInstsAfter(MIB, CI.InstsToMove);

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Tue Apr 30 15:08:23 2019
@@ -536,6 +536,7 @@ static bool buildMUBUFOffsetLoadStore(co
           .addImm(0) // glc
           .addImm(0) // slc
           .addImm(0) // tfe
+          .addImm(0) // dlc
           .cloneMemRefs(*MI);
 
   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
@@ -639,6 +640,7 @@ void SIRegisterInfo::buildSpillLoadStore
       .addImm(0) // glc
       .addImm(0) // slc
       .addImm(0) // tfe
+      .addImm(0) // dlc
       .addMemOperand(NewMMO);
 
     if (NumSubRegs > 1)
@@ -769,6 +771,7 @@ bool SIRegisterInfo::spillSGPR(MachineBa
         .addReg(MFI->getScratchRSrcReg())        // sbase
         .addReg(OffsetReg, RegState::Kill)       // soff
         .addImm(0)                               // glc
+        .addImm(0)                               // dlc
         .addMemOperand(MMO);
 
       continue;
@@ -928,9 +931,10 @@ bool SIRegisterInfo::restoreSGPR(Machine
 
       auto MIB =
         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
-        .addReg(MFI->getScratchRSrcReg()) // sbase
-        .addReg(OffsetReg, RegState::Kill)                // soff
-        .addImm(0)                        // glc
+        .addReg(MFI->getScratchRSrcReg())  // sbase
+        .addReg(OffsetReg, RegState::Kill) // soff
+        .addImm(0)                         // glc
+        .addImm(0)                         // dlc
         .addMemOperand(MMO);
 
       if (NumSubRegs > 1 && i == 0)

Modified: llvm/trunk/lib/Target/AMDGPU/SMInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SMInstructions.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SMInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SMInstructions.td Tue Apr 30 15:08:23 2019
@@ -40,6 +40,7 @@ class SM_Pseudo <string opName, dag outs
   bits<1> has_sbase = 1;
   bits<1> has_sdst = 1;
   bit has_glc = 0;
+  bit has_dlc = 0;
   bits<1> has_offset = 1;
   bits<1> offset_is_imm = 0;
 }
@@ -79,6 +80,7 @@ class SM_Load_Pseudo <string opName, dag
   let mayLoad = 1;
   let mayStore = 0;
   let has_glc = 1;
+  let has_dlc = 1;
 }
 
 class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
@@ -88,6 +90,7 @@ class SM_Store_Pseudo <string opName, da
   let mayLoad = 0;
   let mayStore = 1;
   let has_glc = 1;
+  let has_dlc = 1;
   let ScalarStore = 1;
 }
 
@@ -108,21 +111,23 @@ multiclass SM_Pseudo_Loads<string opName
                            RegisterClass dstClass> {
   def _IMM  : SM_Load_Pseudo <opName,
                               (outs dstClass:$sdst),
-                              (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc),
-                              " $sdst, $sbase, $offset$glc", []> {
+                              (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+                              " $sdst, $sbase, $offset$glc$dlc", []> {
     let offset_is_imm = 1;
     let BaseClass = baseClass;
     let PseudoInstr = opName # "_IMM";
     let has_glc = 1;
+    let has_dlc = 1;
   }
 
   def _SGPR  : SM_Load_Pseudo <opName,
                               (outs dstClass:$sdst),
-                              (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
-                              " $sdst, $sbase, $offset$glc", []> {
+                              (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+                              " $sdst, $sbase, $offset$glc$dlc", []> {
     let BaseClass = baseClass;
     let PseudoInstr = opName # "_SGPR";
     let has_glc = 1;
+    let has_dlc = 1;
   }
 }
 
@@ -130,8 +135,8 @@ multiclass SM_Pseudo_Stores<string opNam
                            RegisterClass baseClass,
                            RegisterClass srcClass> {
   def _IMM  : SM_Store_Pseudo <opName,
-    (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc),
-    " $sdata, $sbase, $offset$glc", []> {
+    (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+    " $sdata, $sbase, $offset$glc$dlc", []> {
     let offset_is_imm = 1;
     let BaseClass = baseClass;
     let SrcClass = srcClass;
@@ -139,8 +144,8 @@ multiclass SM_Pseudo_Stores<string opNam
   }
 
   def _SGPR  : SM_Store_Pseudo <opName,
-    (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
-    " $sdata, $sbase, $offset$glc", []> {
+    (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+    " $sdata, $sbase, $offset$glc$dlc", []> {
     let BaseClass = baseClass;
     let SrcClass = srcClass;
     let PseudoInstr = opName # "_SGPR";
@@ -184,6 +189,16 @@ multiclass SM_Pseudo_Probe<string opName
   def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
 }
 
+class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
+  opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
+  " $sdst", [(set i32:$sdst, (node))]> {
+  let hasSideEffects = 1;
+  let mayStore = 0;
+  let mayLoad = 1;
+  let has_sbase = 0;
+  let has_offset = 0;
+}
+
 //===----------------------------------------------------------------------===//
 // Scalar Atomic Memory Classes
 //===----------------------------------------------------------------------===//
@@ -197,6 +212,7 @@ class SM_Atomic_Pseudo <string opName,
   let mayLoad = 1;
   let mayStore = 1;
   let has_glc = 1;
+  let has_dlc = 1;
 
   // Should these be set?
   let ScalarStore = 1;
@@ -212,9 +228,9 @@ class SM_Pseudo_Atomic<string opName,
   SM_Atomic_Pseudo<opName,
                    !if(isRet, (outs dataClass:$sdst), (outs)),
                    !if(isImm,
-                       (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset),
-                       (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset)),
-                   !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", ""),
+                       (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc),
+                       (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
+                   !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
                    isRet> {
   let offset_is_imm = isImm;
   let PseudoInstr = opName # !if(isImm,
@@ -272,6 +288,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_
   "s_buffer_load_dwordx16", SReg_128, SReg_512
 >;
 
+let SubtargetPredicate = HasScalarStores in {
 defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
 defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
 defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
@@ -287,7 +304,7 @@ defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_
 defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
   "s_buffer_store_dwordx4", SReg_128, SReg_128
 >;
-
+} // End SubtargetPredicate = HasScalarStores
 
 def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
 def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
@@ -297,15 +314,22 @@ def S_DCACHE_INV_VOL : SM_Inval_Pseudo <
 } // let SubtargetPredicate = isGFX7GFX8GFX9
 
 let SubtargetPredicate = isGFX8Plus in {
+let OtherPredicates = [HasScalarStores] in {
 def S_DCACHE_WB     : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
 def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+} // End OtherPredicates = [HasScalarStores]
 def S_MEMREALTIME   : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
 
 defm S_ATC_PROBE        : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
 defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
 } // SubtargetPredicate = isGFX8Plus
 
-let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in {
+let SubtargetPredicate = isGFX10Plus in {
+def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
+def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
+} // End SubtargetPredicate = isGFX10Plus
+
+let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
 defm S_SCRATCH_LOAD_DWORD    : SM_Pseudo_Loads <"s_scratch_load_dword",   SReg_64, SReg_32_XM0_XEXEC>;
 defm S_SCRATCH_LOAD_DWORDX2  : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
 defm S_SCRATCH_LOAD_DWORDX4  : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
@@ -313,7 +337,7 @@ defm S_SCRATCH_LOAD_DWORDX4  : SM_Pseudo
 defm S_SCRATCH_STORE_DWORD   : SM_Pseudo_Stores <"s_scratch_store_dword",   SReg_64, SReg_32_XM0_XEXEC>;
 defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
 defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
-} // SubtargetPredicate = HasFlatScratchInsts
+} // SubtargetPredicate = HasScalarFlatScratchInsts
 
 let SubtargetPredicate = HasScalarAtomics in {
 
@@ -375,7 +399,7 @@ defm S_ATOMIC_DEC_X2              : SM_P
 
 } // let SubtargetPredicate = HasScalarAtomics
 
-let SubtargetPredicate = isGFX9Only in {
+let SubtargetPredicate = HasScalarAtomics in {
 defm S_DCACHE_DISCARD    : SM_Pseudo_Discards <"s_dcache_discard">;
 defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
 }
@@ -411,13 +435,13 @@ multiclass SM_Real_Loads_si<bits<5> op,
                             SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
 
   def _IMM_si : SMRD_Real_si <op, immPs> {
-    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc);
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
   }
 
   // FIXME: The operand name $offset is inconsistent with $soff used
   // in the pseudo
   def _SGPR_si : SMRD_Real_si <op, sgprPs> {
-    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
   }
 
 }
@@ -464,10 +488,10 @@ multiclass SM_Real_Loads_vi<bits<8> op,
                             SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
                             SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
   def _IMM_vi : SMEM_Real_vi <op, immPs> {
-    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
   }
   def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
-    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
   }
 }
 
@@ -485,11 +509,11 @@ multiclass SM_Real_Stores_vi<bits<8> op,
   // FIXME: The operand name $offset is inconsistent with $soff used
   // in the pseudo
   def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
-    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
   }
 
   def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
-    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
   }
 }
 
@@ -638,7 +662,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op,
 
   let AssemblerPredicates = [isGFX7Only];
   let DecoderNamespace = "GFX7";
-  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc);
+  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
 
   let LGKM_CNT = ps.LGKM_CNT;
   let SMRD = ps.SMRD;
@@ -718,26 +742,26 @@ multiclass SMRD_Pattern <string Instr, V
   // 1. IMM offset
   def : GCNPat <
     (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
-    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
   >;
 
   // 2. 32-bit IMM offset on CI
   def : GCNPat <
     (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
-    (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
+    (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
     let OtherPredicates = [isGFX7Only];
   }
 
   // 3. SGPR offset
   def : GCNPat <
     (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
-    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
   >;
 
   // 4. No offset
   def : GCNPat <
     (vt (smrd_load (i64 SReg_64:$sbase))),
-    (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
   >;
 }
 
@@ -745,20 +769,20 @@ multiclass SMLoad_Pattern <string Instr,
   // 1. Offset as an immediate
   def : GCNPat <
     (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
-    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), 0))
   >;
 
   // 2. 32-bit IMM offset on CI
   def : GCNPat <
     (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
-    (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
+    (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), 0)> {
     let OtherPredicates = [isGFX7Only];
   }
 
   // 3. Offset loaded in an 32bit SGPR
   def : GCNPat <
     (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
-    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), 0))
   >;
 }
 
@@ -801,3 +825,198 @@ def : GCNPat <
 >;
 
 } // let OtherPredicates = [isGFX8Plus]
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
+    SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
+  bit glc;
+  bit dlc;
+
+  let AssemblerPredicates = [isGFX10Plus];
+  let DecoderNamespace = "GFX10";
+
+  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
+  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
+  let Inst{14}    = !if(ps.has_dlc, dlc, ?);
+  let Inst{16}    = !if(ps.has_glc, glc, ?);
+  let Inst{25-18} = op;
+  let Inst{31-26} = 0x3d;
+  let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?);
+  let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
+                                          !if(ps.has_offset, offset{6-0}, ?));
+}
+
+multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
+                               SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
+                               SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
+  def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+  }
+  def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+  }
+}
+
+class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
+  bits<7> sdata;
+
+  let sdst = ?;
+  let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
+}
+
+multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
+                                SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
+                                SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
+  // FIXME: The operand name $offset is inconsistent with $soff used
+  // in the pseudo
+  def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
+    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+  }
+
+  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
+    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+  }
+}
+
+defm S_LOAD_DWORD            : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
+defm S_LOAD_DWORDX2          : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
+defm S_LOAD_DWORDX4          : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
+defm S_LOAD_DWORDX8          : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
+defm S_LOAD_DWORDX16         : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarFlatScratchInsts in {
+defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
+defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
+defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
+} // End SubtargetPredicate = HasScalarFlatScratchInsts
+
+defm S_BUFFER_LOAD_DWORD     : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_DWORDX2   : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_DWORDX4   : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_DWORDX8   : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_DWORDX16  : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarStores in {
+defm S_STORE_DWORD           : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
+defm S_STORE_DWORDX2         : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
+defm S_STORE_DWORDX4         : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
+let OtherPredicates = [HasScalarFlatScratchInsts] in {
+defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
+defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
+defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
+} // End OtherPredicates = [HasScalarFlatScratchInsts]
+defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
+defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
+defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
+} // End SubtargetPredicate = HasScalarStores
+
+def S_MEMREALTIME_gfx10              : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
+def S_MEMTIME_gfx10                  : SMEM_Real_gfx10<0x024, S_MEMTIME>;
+def S_GL1_INV_gfx10                  : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
+def S_GET_WAVEID_IN_WORKGROUP_gfx10  : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
+def S_DCACHE_INV_gfx10               : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
+
+let SubtargetPredicate = HasScalarStores in {
+def S_DCACHE_WB_gfx10                : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
+} // End SubtargetPredicate = HasScalarStores
+
+multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
+  def _IMM_gfx10  : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_ATC_PROBE        : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
+
+class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
+  : SMEM_Real_gfx10 <op, ps> {
+
+  bits<7> sdata;
+  bit dlc;
+
+  let Constraints = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  let glc = ps.glc;
+
+  let Inst{14} = !if(ps.has_dlc, dlc, 0);
+  let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
+}
+
+multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
+  def _IMM_gfx10       : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
+  def _SGPR_gfx10      : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+  def _IMM_RTN_gfx10   : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
+  def _SGPR_RTN_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+}
+
+let SubtargetPredicate = HasScalarAtomics in {
+
+defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
+defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
+defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
+defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
+defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
+defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
+defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
+defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
+defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
+defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
+defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
+defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
+defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
+
+defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
+defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
+defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
+defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
+defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
+defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
+defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
+defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
+defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
+defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
+defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
+defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
+
+defm S_ATOMIC_SWAP                : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
+defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
+defm S_ATOMIC_ADD                 : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
+defm S_ATOMIC_SUB                 : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
+defm S_ATOMIC_SMIN                : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
+defm S_ATOMIC_UMIN                : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
+defm S_ATOMIC_SMAX                : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
+defm S_ATOMIC_UMAX                : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
+defm S_ATOMIC_AND                 : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
+defm S_ATOMIC_OR                  : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
+defm S_ATOMIC_XOR                 : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
+defm S_ATOMIC_INC                 : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
+defm S_ATOMIC_DEC                 : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
+
+defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
+defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
+defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
+defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
+defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
+defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
+defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
+defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
+defm S_ATOMIC_AND_X2              : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
+defm S_ATOMIC_OR_X2               : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
+defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
+defm S_ATOMIC_INC_X2              : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
+defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
+
+multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
+  def _IMM_gfx10  : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+  def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_DCACHE_DISCARD    : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
+defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
+
+} // End SubtargetPredicate = HasScalarAtomics

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir Tue Apr 30 15:08:23 2019
@@ -18,7 +18,7 @@ body: |
     ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
     ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:sgpr(p1) = COPY $sgpr2_sgpr3
     %1:vgpr(p1) = COPY %0
     %2:vgpr(s32) = G_IMPLICIT_DEF

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir Tue Apr 30 15:08:23 2019
@@ -13,7 +13,7 @@ body: |
     ; GCN-LABEL: name: implicit_def_s32
     ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p1) = COPY $vgpr3_vgpr4
     %1:vgpr(s32) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -30,7 +30,7 @@ body: |
     ; GCN-LABEL: name: implicit_def_s64
     ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
     ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-    ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p1) = COPY $vgpr3_vgpr4
     %1:vgpr(s64) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store 8, addrspace 1)
@@ -60,7 +60,7 @@ body: |
     ; GCN-LABEL: name: implicit_def_p1
     ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
-    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p1) = G_IMPLICIT_DEF
     %1:vgpr(s32) = G_CONSTANT i32 4
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -76,7 +76,7 @@ body: |
     ; GCN-LABEL: name: implicit_def_p3
     ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
-    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p3) = G_IMPLICIT_DEF
     %1:vgpr(s32) = G_CONSTANT i32 4
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -92,7 +92,7 @@ body: |
     ; GCN-LABEL: name: implicit_def_p4
     ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
-    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p4) = G_IMPLICIT_DEF
     %1:vgpr(s32) = G_CONSTANT i32 4
     G_STORE %1, %0 :: (store 4, addrspace 1)

Modified: llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir Tue Apr 30 15:08:23 2019
@@ -8,9 +8,9 @@ name: trivial_smem_clause_load_smrd4_x1
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -20,11 +20,11 @@ name: trivial_smem_clause_load_smrd4_x2
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -34,13 +34,13 @@ name: trivial_smem_clause_load_smrd4_x3
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
-    ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
-    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
-    ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
-    $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+    ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
+    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+    ; GCN-NEXT: S_ENDPGM 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+    $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -50,15 +50,15 @@ name: trivial_smem_clause_load_smrd4_x4
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
-    ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
-    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
-    ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
-    ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
-    $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
-    $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+    ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
+    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+    ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
+    ; GCN-NEXT: S_ENDPGM 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+    $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+    $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -67,11 +67,11 @@ name: trivial_smem_clause_load_smrd4_x2_
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
-    ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -81,9 +81,9 @@ name: smrd_load4_overwrite_ptr_lo
 body: |
   bb.0:
     ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
-    ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -93,9 +93,9 @@ name: smrd_load4_overwrite_ptr_hi
 body: |
   bb.0:
     ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
-    ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -105,9 +105,9 @@ name: smrd_load8_overwrite_ptr
 body: |
   bb.0:
     ; GCN-LABEL: name: smrd_load8_overwrite_ptr
-    ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -119,46 +119,46 @@ name: break_smem_clause_at_max_smem_clau
 body: |
   bb.0:
     ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
-    ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
+    ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
     ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-
-    $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-
-    $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-
-    $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+
+    $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+
+    $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+
+    $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
 
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
     $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
     S_ENDPGM 0
 ...
@@ -169,12 +169,12 @@ name: break_smem_clause_simple_load_smrd
 body: |
   bb.0:
     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
-    ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -184,11 +184,11 @@ name: break_smem_clause_simple_load_smrd
 body: |
   bb.0:
     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -198,12 +198,12 @@ name: break_smem_clause_simple_load_smrd
 body: |
   bb.0:
     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
-    ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -213,11 +213,11 @@ name: break_smem_clause_simple_load_smrd
 body: |
   bb.0:
     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
-    ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
+    ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
-    $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+    $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -228,16 +228,16 @@ body: |
   ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x80000000)
-  ; GCN:   $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+  ; GCN:   $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
   ; GCN: bb.1:
   ; XNACK-NEXT:   S_NOP 0
-  ; GCN-NEXT:   $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+  ; GCN-NEXT:   $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
   ; GCN-NEXT:   S_ENDPGM 0
   bb.0:
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
 
   bb.1:
-    $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -248,11 +248,11 @@ name: break_smem_clause_store_load_into_
 body: |
   bb.0:
     ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
-    ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+    ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
-    $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+    S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
+    $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -264,11 +264,11 @@ name: break_smem_clause_store_load_into_
 body: |
   bb.0:
     ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
-    ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
-    $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
+    $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -278,13 +278,13 @@ name: valu_inst_breaks_smem_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: valu_inst_breaks_smem_clause
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
-    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     $vgpr8 = V_MOV_B32_e32 0, implicit $exec
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -294,13 +294,13 @@ name: salu_inst_breaks_smem_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: salu_inst_breaks_smem_clause
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
-    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     $sgpr8 = S_MOV_B32 0
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -309,13 +309,13 @@ name: ds_inst_breaks_smem_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: ds_inst_breaks_smem_clause
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
-    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
     $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -325,13 +325,13 @@ name: flat_inst_breaks_smem_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: flat_inst_breaks_smem_clause
-    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
-    ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+    ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+    ; GCN-NEXT: S_ENDPGM 0
+    $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -341,11 +341,11 @@ name: implicit_use_breaks_smem_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: implicit_use_breaks_smem_clause
-    ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
+    ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
+    ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
     ; GCN-NEXT: S_ENDPGM 0
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
-    $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
+    $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
     S_ENDPGM 0
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir Tue Apr 30 15:08:23 2019
@@ -7,10 +7,10 @@ name: trivial_clause_load_flat4_x1
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_clause_load_flat4_x1
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -20,12 +20,12 @@ name: trivial_clause_load_flat4_x2
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_clause_load_flat4_x2
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -35,14 +35,14 @@ name: trivial_clause_load_flat4_x3
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_clause_load_flat4_x3
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -52,16 +52,16 @@ name: trivial_clause_load_flat4_x4
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_clause_load_flat4_x4
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -71,12 +71,12 @@ name: trivial_clause_load_flat4_x2_samep
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -86,10 +86,10 @@ name: flat_load4_overwrite_ptr_lo
 body: |
   bb.0:
     ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -99,10 +99,10 @@ name: flat_load4_overwrite_ptr_hi
 body: |
   bb.0:
     ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
-    ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -112,10 +112,10 @@ name: flat_load8_overwrite_ptr
 body: |
   bb.0:
     ; GCN-LABEL: name: flat_load8_overwrite_ptr
-    ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -128,48 +128,48 @@ name: break_clause_at_max_clause_size_fl
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
-    ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-
-    $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-
-    $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-
-    $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+    $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+    $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+    $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
     S_ENDPGM 0
 ...
@@ -180,13 +180,13 @@ name: break_clause_simple_load_flat4_lo_
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -196,13 +196,13 @@ name: break_clause_simple_load_flat4_hi_
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -212,13 +212,13 @@ name: break_clause_simple_load_flat8_ptr
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
-    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -229,12 +229,12 @@ name: break_clause_simple_load_flat16_pt
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
-    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
-    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -249,17 +249,17 @@ body: |
   ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
   ; GCN: bb.0:
   ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN:   $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+  ; GCN:   $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
   ; GCN: bb.1:
   ; XNACK-NEXT:  S_NOP 0
-  ; GCN-NEXT:   $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+  ; GCN-NEXT:   $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
   ; GCN-NEXT:   S_ENDPGM 0
 
   bb.0:
-    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
   bb.1:
-    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -270,12 +270,12 @@ name: break_clause_store_load_into_ptr_f
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
-    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -287,12 +287,12 @@ name: break_clause_store_load_into_data_
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_store_load_into_data_flat4
-    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -303,15 +303,15 @@ name: valu_inst_breaks_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: valu_inst_breaks_clause
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr8 = V_MOV_B32_e32 0, implicit $exec
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -322,15 +322,15 @@ name: salu_inst_breaks_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: salu_inst_breaks_clause
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $sgpr8 = S_MOV_B32 0
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -340,15 +340,15 @@ name: ds_inst_breaks_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: ds_inst_breaks_clause
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -358,14 +358,14 @@ name: smrd_inst_breaks_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: smrd_inst_breaks_clause
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -375,13 +375,13 @@ name: implicit_use_breaks_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: implicit_use_breaks_clause
-    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
+    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
-    $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
+    $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -390,12 +390,12 @@ name: trivial_clause_load_mubuf4_x2
 body: |
   bb.0:
     ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
-    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
-    $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
 ---
@@ -404,13 +404,13 @@ name: break_clause_simple_load_mubuf_off
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
-    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
-    $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
 ---
@@ -421,11 +421,11 @@ name: mubuf_load4_overwrite_ptr
 body: |
   bb.0:
     ; GCN-LABEL: name: mubuf_load4_overwrite_ptr
-    ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
     ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = V_MOV_B32_e32 0, implicit $exec
     $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
     S_ENDPGM 0
@@ -438,13 +438,13 @@ name: break_clause_flat_load_mubuf_load
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_flat_load_mubuf_load
-    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
 # Break a clause from interference between mubuf and flat instructions
@@ -459,8 +459,8 @@ name: break_clause_mubuf_load_flat_load
 
 body: |
   bb.0:
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
-    $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     S_ENDPGM 0
 ...
@@ -471,12 +471,12 @@ name: break_clause_atomic_rtn_into_ptr_f
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
-    ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
     ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
@@ -487,11 +487,11 @@ body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
     ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GCN-NEXT: S_ENDPGM 0
 
     FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -501,12 +501,12 @@ name: break_clause_atomic_rtn_into_ptr_m
 body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
-    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; XNACK-NEXT: S_NOP 0
     ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0
 
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
@@ -518,11 +518,11 @@ body: |
   bb.0:
     ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
     ; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
-    ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0
 
     BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
 ---
@@ -533,11 +533,11 @@ name: no_break_clause_mubuf_load_novaddr
 body: |
   bb.0:
     ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
-    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
-    $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
 ---
@@ -547,16 +547,16 @@ name: mix_load_store_clause
 body: |
   bb.0:
     ; GCN-LABEL: name: mix_load_store_clause
-    ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
-    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -566,15 +566,15 @@ name: mix_load_store_clause_same_address
 body: |
   bb.0:
     ; GCN-LABEL: name: mix_load_store_clause_same_address
-    ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; XNACK-NEXT: S_NOP 0
-    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
-    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
-    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
-    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir Tue Apr 30 15:08:23 2019
@@ -43,8 +43,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -55,10 +55,10 @@ body:             |
     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %18 = COPY %26
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -105,8 +105,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -117,10 +117,10 @@ body:             |
     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %18 = COPY %26
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
 ---
@@ -168,8 +168,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -180,10 +180,10 @@ body:             |
     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %18 = COPY %26
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -233,8 +233,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -245,10 +245,10 @@ body:             |
     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %18 = COPY %26
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -310,8 +310,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -322,10 +322,10 @@ body:             |
     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %18 = COPY %26
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -375,8 +375,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -387,10 +387,10 @@ body:             |
     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %18 = COPY %26
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir Tue Apr 30 15:08:23 2019
@@ -17,15 +17,15 @@ body:             |
 
     $vgpr0_vgpr1 = IMPLICIT_DEF
     $vgpr4_vgpr5 = IMPLICIT_DEF
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
-    $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     $vgpr2 = IMPLICIT_DEF
     $vgpr3 = IMPLICIT_DEF
     $vgpr6 = IMPLICIT_DEF
     $vgpr0 = V_ADD_I32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
     $vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec
-    FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
-    FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir Tue Apr 30 15:08:23 2019
@@ -14,7 +14,7 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
-    %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir Tue Apr 30 15:08:23 2019
@@ -30,7 +30,7 @@ body: |
     %14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
     %15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
     %16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
+    BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
     S_ENDPGM 0
 
   bb.2:
@@ -78,7 +78,7 @@ body: |
 
   bb.8:
     successors: %bb.10
-    %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
+    %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
     %34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
     %35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
     %28:vreg_1 = COPY %35

Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir Tue Apr 30 15:08:23 2019
@@ -83,7 +83,7 @@ body:             |
 
   bb.9:
     successors: %bb.10(0x80000000)
-    %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+    %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
     %21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec
     %22:sreg_64 = COPY $exec, implicit-def $exec
     %23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc

Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir Tue Apr 30 15:08:23 2019
@@ -68,7 +68,7 @@ body:             |
     %23:vreg_128 = COPY killed %17
     %24:sreg_64 = COPY killed %16
     %25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec
-    %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+    %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
     %28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec
     %29:vreg_128 = COPY killed %21
     %29.sub0:vreg_128 = COPY %1

Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir Tue Apr 30 15:08:23 2019
@@ -46,10 +46,10 @@ body:             |
     %0 = COPY $sgpr2_sgpr3
     %1 = COPY $vgpr2
     %2 = COPY $vgpr3
-    %3 = S_LOAD_DWORDX8_IMM %0, 0, 0
-    %4 = S_LOAD_DWORDX4_IMM %0, 12, 0
-    %5 = S_LOAD_DWORDX8_IMM %0, 16, 0
-    %6 = S_LOAD_DWORDX4_IMM %0, 28, 0
+    %3 = S_LOAD_DWORDX8_IMM %0, 0, 0, 0
+    %4 = S_LOAD_DWORDX4_IMM %0, 12, 0, 0
+    %5 = S_LOAD_DWORDX8_IMM %0, 16, 0, 0
+    %6 = S_LOAD_DWORDX4_IMM %0, 28, 0, 0
     undef %7.sub0 = S_MOV_B32 212739
     %20 = COPY %7
     %11 = COPY %20

Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir Tue Apr 30 15:08:23 2019
@@ -11,7 +11,7 @@
 #
 # GCN-LABEL: bb.6:
 # GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}})
-# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit $exec
+# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, implicit $exec
 #
 
 --- |
@@ -69,7 +69,7 @@ body: |
     %10:sreg_64 = COPY killed %5
     undef %11.sub2:sreg_128 = COPY %4
     %11.sub3:sreg_128 = COPY %3
-    %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, implicit $exec
+    %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, implicit $exec
     undef %13.sub1:vreg_128 = COPY %9.sub1
     %13.sub2:vreg_128 = COPY %9.sub2
     %14:sreg_64 = V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $exec
@@ -161,7 +161,7 @@ body: |
   bb.18:
     successors: %bb.7(0x80000000)
     dead %59:vgpr_32 = V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $exec
-    dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, implicit $exec
+    dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, 0, implicit $exec
     undef %66.sub1:vreg_128 = COPY %13.sub1
     %66.sub2:vreg_128 = COPY %13.sub2
     %67:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir Tue Apr 30 15:08:23 2019
@@ -145,10 +145,10 @@ body: |
     %40:vgpr_32 = V_MAD_F32 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $exec
     %41:vgpr_32 = V_MAD_F32 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $exec
     %42:vgpr_32 = V_CVT_I32_F32_e32 killed %41, implicit $exec
-    %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0 :: (dereferenceable invariant load 4)
+    %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0, 0 :: (dereferenceable invariant load 4)
     %45:vgpr_32 = V_MUL_LO_I32 killed %42, killed %43, implicit $exec
     %46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec
-    %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
+    %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
     %49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec
     %50:sreg_64 = COPY $exec, implicit-def $exec
     %51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc

Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir Tue Apr 30 15:08:23 2019
@@ -25,7 +25,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -33,7 +33,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -44,7 +44,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x80000000)
   ; GCN:   DBG_VALUE
@@ -72,7 +72,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -80,7 +80,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -92,7 +92,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     $exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -133,7 +133,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -141,7 +141,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -152,7 +152,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x80000000)
   ; GCN: bb.4:
@@ -180,7 +180,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -188,7 +188,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -200,7 +200,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     $exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -241,7 +241,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -249,7 +249,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -260,7 +260,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x80000000)
   ; GCN: bb.4:
@@ -289,7 +289,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -297,7 +297,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -309,7 +309,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     $exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -350,7 +350,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -358,7 +358,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -370,7 +370,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x80000000)
   ; GCN:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
@@ -400,7 +400,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -408,7 +408,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -420,7 +420,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     %15:sgpr_32 = IMPLICIT_DEF
@@ -463,7 +463,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -471,7 +471,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -482,7 +482,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x80000000)
   ; GCN:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
@@ -512,7 +512,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -520,7 +520,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -532,7 +532,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     $exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -575,7 +575,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -583,7 +583,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -595,7 +595,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x80000000)
   ; GCN:   $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@@ -623,7 +623,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -631,7 +631,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -643,7 +643,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     $exec = S_OR_B64 $exec, %12,  implicit-def $scc
@@ -683,7 +683,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -691,7 +691,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -703,7 +703,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x80000000)
   ; GCN:   $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@@ -731,7 +731,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -739,7 +739,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -751,7 +751,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     $exec = S_OR_B64 $exec, %12,  implicit-def $scc
@@ -791,7 +791,7 @@ body:             |
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+  ; GCN:   undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -799,7 +799,7 @@ body:             |
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
   ; GCN:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -811,7 +811,7 @@ body:             |
   ; GCN:   %5.sub0:sgpr_128 = COPY %5.sub2
   ; GCN:   %5.sub1:sgpr_128 = COPY %5.sub2
   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; GCN: bb.3:
   ; GCN:   successors: %bb.5(0x80000000)
   ; GCN:   $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@@ -842,7 +842,7 @@ body:             |
   bb.1:
     successors: %bb.2, %bb.3
 
-    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+    undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
@@ -850,7 +850,7 @@ body:             |
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
-    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -862,7 +862,7 @@ body:             |
     %5.sub0:sgpr_128 = COPY %5.sub2
     %5.sub1:sgpr_128 = COPY %5.sub2
     %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
 
   bb.3:
     $exec = S_OR_B64 $exec, %12, implicit-def $scc

Modified: llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir Tue Apr 30 15:08:23 2019
@@ -44,7 +44,7 @@ body:             |
     liveins: $sgpr0_sgpr1
 
     %0 = COPY $sgpr0_sgpr1
-    %1 = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %1 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %2 = COPY %1.sub1
     %3 = COPY %1.sub0
     %4 = S_MOV_B32 61440
@@ -54,7 +54,7 @@ body:             |
     %8 = S_MOV_B32 9999
     %9 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc
     %10 = COPY %9
-    BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -130,7 +130,7 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %31 = V_ASHRREV_I32_e64 31, %3, implicit $exec
     %32 = REG_SEQUENCE %3, 1, %31, 2
     %33 = V_LSHLREV_B64 2, killed %32, implicit $exec
@@ -144,19 +144,19 @@ body:             |
     %34 = V_MOV_B32_e32 63, implicit $exec
 
     %27 = V_AND_B32_e64 %26, %24, implicit $exec
-    FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %37, %27, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %28 = V_AND_B32_e64 %24, %26, implicit $exec
-    FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %37, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %29 = V_AND_B32_e32 %26, %24, implicit $exec
-    FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %37, %29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %30 = V_AND_B32_e64 %26, %26, implicit $exec
-    FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %37, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %31 = V_AND_B32_e64 %34, %34, implicit $exec
-    FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %37, %31, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     S_ENDPGM 0
 
@@ -210,7 +210,7 @@ body:             |
     liveins: $sgpr0_sgpr1
 
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %5 = S_MOV_B32 1
     %6 = COPY %4.sub1
     %7 = COPY %4.sub0
@@ -219,7 +219,7 @@ body:             |
     %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
     %12 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc
     %13 = COPY %12
-    BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -316,7 +316,7 @@ body:             |
 
     %2 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
     %16 = REG_SEQUENCE %2, 1, %15, 2
     %17 = V_LSHLREV_B64 2, killed %16, implicit $exec
@@ -332,34 +332,34 @@ body:             |
     %27 = S_MOV_B32 -4
 
     %11 = V_LSHLREV_B32_e64 12, %10, implicit $exec
-    FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %12 = V_LSHLREV_B32_e64 %7, 12, implicit $exec
-    FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %13 = V_LSHL_B32_e64 %7, 12, implicit $exec
-    FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %14 = V_LSHL_B32_e64 12, %7, implicit $exec
-    FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %15 = V_LSHL_B32_e64 12, %24, implicit $exec
-    FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %22 = V_LSHL_B32_e64 %6, 12, implicit $exec
-    FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %23 = V_LSHL_B32_e64 %6, 32, implicit $exec
-    FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %25 = V_LSHL_B32_e32 %6, %6, implicit $exec
-    FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %26 = V_LSHLREV_B32_e32 11, %24, implicit $exec
-    FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %28 = V_LSHL_B32_e32 %27, %6, implicit $exec
-    FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     S_ENDPGM 0
 
@@ -410,7 +410,7 @@ body:             |
     liveins: $sgpr0_sgpr1
 
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %5 = S_MOV_B32 999123
     %6 = COPY %4.sub1
     %7 = COPY %4.sub0
@@ -419,7 +419,7 @@ body:             |
     %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
     %12 = S_ASHR_I32 killed %5, 12, implicit-def dead $scc
     %13 = COPY %12
-    BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -519,7 +519,7 @@ body:             |
 
     %2 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
     %16 = REG_SEQUENCE %2, 1, %15, 2
     %17 = V_LSHLREV_B64 2, killed %16, implicit $exec
@@ -540,34 +540,34 @@ body:             |
     %35 = V_MOV_B32_e32 2, implicit $exec
 
     %11 = V_ASHRREV_I32_e64 8, %10, implicit $exec
-    FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %12 = V_ASHRREV_I32_e64 %8, %10, implicit $exec
-    FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %13 = V_ASHR_I32_e64 %7, 3, implicit $exec
-    FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %14 = V_ASHR_I32_e64 7, %32, implicit $exec
-    FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %15 = V_ASHR_I32_e64 %27, %24, implicit $exec
-    FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %22 = V_ASHR_I32_e64 %6, 4, implicit $exec
-    FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %23 = V_ASHR_I32_e64 %6, %33, implicit $exec
-    FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %25 = V_ASHR_I32_e32 %34, %34, implicit $exec
-    FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %26 = V_ASHRREV_I32_e32 11, %10, implicit $exec
-    FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %28 = V_ASHR_I32_e32 %27, %35, implicit $exec
-    FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     S_ENDPGM 0
 
@@ -618,7 +618,7 @@ body:             |
     liveins: $sgpr0_sgpr1
 
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %5 = S_MOV_B32 -999123
     %6 = COPY %4.sub1
     %7 = COPY %4.sub0
@@ -627,7 +627,7 @@ body:             |
     %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
     %12 = S_LSHR_B32 killed %5, 12, implicit-def dead $scc
     %13 = COPY %12
-    BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -728,7 +728,7 @@ body:             |
 
     %2 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
     %16 = REG_SEQUENCE %2, 1, %15, 2
     %17 = V_LSHLREV_B64 2, killed %16, implicit $exec
@@ -749,34 +749,34 @@ body:             |
     %35 = V_MOV_B32_e32 2, implicit $exec
 
     %11 = V_LSHRREV_B32_e64 8, %10, implicit $exec
-    FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %12 = V_LSHRREV_B32_e64 %8, %10, implicit $exec
-    FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %13 = V_LSHR_B32_e64 %7, 3, implicit $exec
-    FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %14 = V_LSHR_B32_e64 7, %32, implicit $exec
-    FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %15 = V_LSHR_B32_e64 %27, %24, implicit $exec
-    FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %22 = V_LSHR_B32_e64 %6, 4, implicit $exec
-    FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %23 = V_LSHR_B32_e64 %6, %33, implicit $exec
-    FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %25 = V_LSHR_B32_e32 %34, %34, implicit $exec
-    FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %26 = V_LSHRREV_B32_e32 11, %10, implicit $exec
-    FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %28 = V_LSHR_B32_e32 %27, %35, implicit $exec
-    FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     S_ENDPGM 0
 
@@ -800,7 +800,7 @@ body:             |
   bb.0:
     %0 = V_MOV_B32_e32 0, implicit $exec
     %2 = V_XOR_B32_e64 killed %0, undef %1, implicit $exec
-    FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir Tue Apr 30 15:08:23 2019
@@ -291,7 +291,7 @@ body:             |
   bb.3..lr.ph3410.preheader:
     successors: %bb.4(0x80000000)
 
-    dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+    dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
     dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
     %36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
     dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir Tue Apr 30 15:08:23 2019
@@ -11,7 +11,7 @@ body:             |
   bb.0:
     liveins: $sgpr0_sgpr1
 
-    %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0
+    %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0
     S_NOP 0, implicit-def %4:sreg_128, implicit %10.sub1:sreg_128
     S_CBRANCH_SCC0 %bb.3, implicit undef $scc
     S_BRANCH %bb.1
@@ -26,7 +26,7 @@ body:             |
     S_BRANCH %bb.4
 
   bb.3:
-    %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0
+    %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0
     %7:sreg_32_xm0 = COPY %10.sub1:sreg_128
     %8:sreg_32_xm0 = COPY %10.sub2:sreg_128
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir Tue Apr 30 15:08:23 2019
@@ -12,7 +12,7 @@ body:             |
     %1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
     %3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
-    FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir Tue Apr 30 15:08:23 2019
@@ -50,6 +50,6 @@ body: |
   %0 = IMPLICIT_DEF
   %1 = IMPLICIT_DEF
   %2 = IMPLICIT_DEF
-  GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, implicit $exec
+  GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, 0, implicit $exec
   dead %2:vgpr_32 = V_MAC_F32_e32 %0:vgpr_32, %1:vgpr_32, %2:vgpr_32, implicit $exec
   S_ENDPGM 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir Tue Apr 30 15:08:23 2019
@@ -23,5 +23,5 @@ body:    |
     $vgpr10 = COPY killed $sgpr14, implicit $exec
     $vgpr11 = COPY killed $sgpr15, implicit $exec
 
-    FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir Tue Apr 30 15:08:23 2019
@@ -71,7 +71,7 @@ body:             |
   ; CHECK:   dead %26:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $exec
   ; CHECK:   dead %27:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $exec
   ; CHECK:   dead %28:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF6]], 0, [[DEF3]], 0, 0, implicit $exec
-  ; CHECK:   GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, implicit $exec
+  ; CHECK:   GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, 0, implicit $exec
   ; CHECK:   S_ENDPGM 0
   bb.0:
     successors: %bb.1
@@ -129,7 +129,7 @@ body:             |
     %26:vgpr_32 = V_MAD_F32 0, %25, 0, %4, 0, %1, 0, 0, implicit $exec
     %27:vgpr_32 = V_MAD_F32 0, %25, 0, %5, 0, %2, 0, 0, implicit $exec
     %28:vgpr_32 = V_MAD_F32 0, %25, 0, %6, 0, %3, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir Tue Apr 30 15:08:23 2019
@@ -17,7 +17,7 @@ body:             |
     %0 = IMPLICIT_DEF
     %3 = IMPLICIT_DEF
     $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
     %4 = S_ADD_U32 %3, 1, implicit-def $scc
     S_ENDPGM 0
@@ -25,7 +25,7 @@ body:             |
 ---
 # GCN-LABEL: name: load_without_memoperand
 # GCN:      $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 # GCN-NEXT: S_ENDPGM 0
 name: load_without_memoperand
 tracksRegLiveness: true
@@ -41,7 +41,7 @@ body:             |
     %0 = IMPLICIT_DEF
     %3 = IMPLICIT_DEF
     $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
     %4 = S_ADD_U32 %3, 1, implicit-def $scc
     S_ENDPGM 0
@@ -49,7 +49,7 @@ body:             |
 ---
 # GCN-LABEL: name: load_volatile
 # GCN:      $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
+# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
 # GCN-NEXT: S_ENDPGM 0
 name: load_volatile
 tracksRegLiveness: true
@@ -65,7 +65,7 @@ body:             |
     %0 = IMPLICIT_DEF
     %3 = IMPLICIT_DEF
     $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
     %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
     %4 = S_ADD_U32 %3, 1, implicit-def $scc
     S_ENDPGM 0
@@ -73,7 +73,7 @@ body:             |
 ---
 # GCN-LABEL: name: store
 # GCN:      $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
 # GCN-NEXT: S_ENDPGM 0
 name: store
 tracksRegLiveness: true
@@ -86,7 +86,7 @@ body:             |
     %0 = IMPLICIT_DEF
     %1 = IMPLICIT_DEF
     $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-    FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     S_ENDPGM 0
 ...
 ---
@@ -297,12 +297,12 @@ body:             |
     S_ENDPGM 0
 ...
 
-# GCN-LABEL: name: implicit_use_on_s_endpgm
+# GCN-LABEL: name: implicit_use_on_S_ENDPGM 0
 # GCN: V_ADD_I32
 # GCN: COPY
 # GCN: V_ADDC_U32
 # GCN: S_ENDPGM 0, implicit %3
-name: implicit_use_on_s_endpgm
+name: implicit_use_on_S_ENDPGM 0
 tracksRegLiveness: true
 
 body:             |

Modified: llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir Tue Apr 30 15:08:23 2019
@@ -54,24 +54,24 @@ body:             |
 
     %1 = COPY $sgpr4_sgpr5
     %0 = COPY $vgpr0
-    %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %3 = S_LOAD_DWORDX2_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %1, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %7 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %2 = V_MOV_B32_e32 0, implicit $exec
     undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec
     %11 = COPY %4.sub1
     %12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
-    %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
+    %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
     undef %9.sub0 = V_ADD_I32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec
     %8 = COPY %3.sub1
     %9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
     undef %13.sub0 = V_ADD_I32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec
     %13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
-    %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
+    %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
     undef %10.sub0 = V_ADD_I32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec
     %10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
-    FLAT_STORE_DWORD %9, %5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
-    FLAT_STORE_DWORD %10, %6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
+    FLAT_STORE_DWORD %9, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
+    FLAT_STORE_DWORD %10, %6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
     S_ENDPGM 0
 
 ...

Added: llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll?rev=359621&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll Tue Apr 30 15:08:23 2019
@@ -0,0 +1,85 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+
+; GCN-LABEL: flat_inst_offset:
+; GFX9:  flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:4
+; GFX9:  flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}} offset:4
+; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+; GFX10: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}{{$}}
+define void @flat_inst_offset(i32* nocapture %p) {
+  %gep = getelementptr inbounds i32, i32* %p, i64 1
+  %load = load i32, i32* %gep, align 4
+  %inc = add nsw i32 %load, 1
+  store i32 %inc, i32* %gep, align 4
+  ret void
+}
+
+; GCN-LABEL: global_inst_offset:
+; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off offset:4
+; GCN: global_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:4
+define void @global_inst_offset(i32 addrspace(1)* nocapture %p) {
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 1
+  %load = load i32, i32 addrspace(1)* %gep, align 4
+  %inc = add nsw i32 %load, 1
+  store i32 %inc, i32 addrspace(1)* %gep, align 4
+  ret void
+}
+
+; GCN-LABEL: load_i16_lo:
+; GFX9 : flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_i16_lo(i16* %arg, <2 x i16>* %out) {
+  %gep = getelementptr inbounds i16, i16* %arg, i32 4
+  %ld = load i16, i16* %gep, align 2
+  %vec = insertelement <2 x i16> <i16 undef, i16 0>, i16 %ld, i32 0
+  %v = add <2 x i16> %vec, %vec
+  store <2 x i16> %v, <2 x i16>* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: load_i16_hi:
+; GFX9:  flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_i16_hi(i16* %arg, <2 x i16>* %out) {
+  %gep = getelementptr inbounds i16, i16* %arg, i32 4
+  %ld = load i16, i16* %gep, align 2
+  %vec = insertelement <2 x i16> <i16 0, i16 undef>, i16 %ld, i32 1
+  %v = add <2 x i16> %vec, %vec
+  store <2 x i16> %v, <2 x i16>* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: load_half_lo:
+; GFX9:  flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_half_lo(half* %arg, <2 x half>* %out) {
+  %gep = getelementptr inbounds half, half* %arg, i32 4
+  %ld = load half, half* %gep, align 2
+  %vec = insertelement <2 x half> <half undef, half 0xH0000>, half %ld, i32 0
+  %v = fadd <2 x half> %vec, %vec
+  store <2 x half> %v, <2 x half>* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: load_half_hi:
+; GFX9:  flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_half_hi(half* %arg, <2 x half>* %out) {
+  %gep = getelementptr inbounds half, half* %arg, i32 4
+  %ld = load half, half* %gep, align 2
+  %vec = insertelement <2 x half> <half 0xH0000, half undef>, half %ld, i32 1
+  %v = fadd <2 x half> %vec, %vec
+  store <2 x half> %v, <2 x half>* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: load_float_lo:
+; GFX9:  flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:16{{$}}
+; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_float_lo(float* %arg, float* %out) {
+  %gep = getelementptr inbounds float, float* %arg, i32 4
+  %ld = load float, float* %gep, align 4
+  %v = fadd float %ld, %ld
+  store float %v, float* %out, align 4
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir Tue Apr 30 15:08:23 2019
@@ -10,12 +10,12 @@ body:             |
     liveins: $vgpr0, $sgpr0_sgpr1
     %0:vgpr_32 = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr0_sgpr1
-    %2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0
+    %2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0, 0
     %3:sreg_32_xm0 = S_MOV_B32 2
     %4:vgpr_32 = V_LSHLREV_B32_e64 killed %3, %0, implicit $exec
     %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %6:vreg_64 = REG_SEQUENCE killed %4, %subreg.sub0, killed %5, %subreg.sub1
-    %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, 0, implicit $exec
     %8:sreg_32_xm0 = S_MOV_B32 65535
     %9:vgpr_32 = COPY %8
     %10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir Tue Apr 30 15:08:23 2019
@@ -158,10 +158,10 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
     %12 = V_MOV_B32_e32 1065353216, implicit $exec
     %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec
-    BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -222,13 +222,13 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
     %13 = V_MOV_B32_e32 1065353216, implicit $exec
     %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
     %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
-    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -289,14 +289,14 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
     %14 = V_MOV_B32_e32 1065353216, implicit $exec
     %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
     %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
-    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -360,16 +360,16 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
     %14 = V_MOV_B32_e32 1065353216, implicit $exec
     %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
     %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
     %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
-    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -427,13 +427,13 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
     %13 = V_MOV_B32_e32 1, implicit $exec
     %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
     %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
-    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -494,16 +494,16 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
     %14 = V_MOV_B32_e32 -2, implicit $exec
     %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
     %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
     %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
-    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -564,13 +564,13 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
     %13 = V_MOV_B32_e32 15360, implicit $exec
     %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
     %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
-    BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -631,13 +631,13 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
     %13 = V_MOV_B32_e32 80886784, implicit $exec
     %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec
     %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
-    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
-    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...
@@ -697,13 +697,13 @@ body:             |
     %8 = S_MOV_B32 61440
     %9 = S_MOV_B32 -1
     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
-    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
-    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
     %13 = V_MOV_B32_e32 305413120, implicit $exec
     %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
     %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
-    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir Tue Apr 30 15:08:23 2019
@@ -46,9 +46,9 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
-    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
     %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %28 = REG_SEQUENCE %3, 1, %27, 2
     %11 = S_MOV_B32 61440
@@ -60,13 +60,13 @@ body:             |
     %17 = REG_SEQUENCE killed %6, 17, %13, 18
     %18 = REG_SEQUENCE killed %4, 17, %13, 18
     %20 = COPY %29
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
     %22 = COPY %29
-    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
     %23 = V_MOV_B32_e32 1090519040, implicit $exec
     %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
     %26 = COPY %29
-    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -117,9 +117,9 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
-    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
     %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %28 = REG_SEQUENCE %3, 1, %27, 2
     %11 = S_MOV_B32 61440
@@ -131,13 +131,13 @@ body:             |
     %17 = REG_SEQUENCE killed %6, 17, %13, 18
     %18 = REG_SEQUENCE killed %4, 17, %13, 18
     %20 = COPY %29
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
     %22 = COPY %29
-    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
     %23 = V_MOV_B32_e32 1090519040, implicit $exec
     %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec
     %26 = COPY %29
-    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -188,9 +188,9 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
-    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
     %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %28 = REG_SEQUENCE %3, 1, %27, 2
     %11 = S_MOV_B32 61440
@@ -202,13 +202,13 @@ body:             |
     %17 = REG_SEQUENCE killed %6, 17, %13, 18
     %18 = REG_SEQUENCE killed %4, 17, %13, 18
     %20 = COPY %29
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
     %22 = COPY %29
-    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
     %23 = V_MOV_B32_e32 1090519040, implicit $exec
     %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
     %26 = COPY %29
-    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -259,9 +259,9 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
-    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
     %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %28 = REG_SEQUENCE %3, 1, %27, 2
     %11 = S_MOV_B32 61440
@@ -273,13 +273,13 @@ body:             |
     %17 = REG_SEQUENCE killed %6, 17, %13, 18
     %18 = REG_SEQUENCE killed %4, 17, %13, 18
     %20 = COPY %29
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
     %22 = COPY %29
-    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
     %23 = V_MOV_B32_e32 1090519040, implicit $exec
     %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec
     %26 = COPY %29
-    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir Tue Apr 30 15:08:23 2019
@@ -34,7 +34,7 @@ body:             |
     %3 = S_LSHL_B32 %1, killed %1, implicit-def dead $scc
     %4 = V_AND_B32_e64 killed %2, killed %3, implicit $exec
     %5 = IMPLICIT_DEF
-    BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir Tue Apr 30 15:08:23 2019
@@ -93,7 +93,7 @@ body:             |
 
     %1:sgpr_64 = COPY $sgpr0_sgpr1
     %0:vgpr_32 = COPY $vgpr0
-    %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 )
+    %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 )
     %5:sreg_32_xm0 = S_MOV_B32 2
     %6:vgpr_32 = V_LSHLREV_B32_e64 killed %5, %0, implicit $exec
     %7:sreg_32_xm0 = S_MOV_B32 0
@@ -109,140 +109,140 @@ body:             |
     %16:vreg_64 = REG_SEQUENCE %17, %subreg.sub0, %18, %subreg.sub1
     %11:vreg_64 = COPY %16
 
-    %10:vgpr_32 = GLOBAL_LOAD_DWORD %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_DWORD %11, %10, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %40:vreg_64 = GLOBAL_LOAD_DWORDX2 %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_DWORDX2 %11, %40, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %41:vreg_96 = GLOBAL_LOAD_DWORDX3 %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_DWORDX3 %11, %41, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %42:vreg_128 = GLOBAL_LOAD_DWORDX4 %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_DWORDX4 %11, %42, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %43:vgpr_32 = GLOBAL_LOAD_SSHORT %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_SHORT %11, %43, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %44:vgpr_32 = GLOBAL_LOAD_USHORT %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_SHORT %11, %44, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %45:vgpr_32 = GLOBAL_LOAD_UBYTE %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_BYTE %11, %45, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %46:vgpr_32 = GLOBAL_LOAD_SBYTE %11, 16, 0, 0, implicit $exec :: (load 4)
-    GLOBAL_STORE_BYTE %11, %46, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %47:vgpr_32 = GLOBAL_LOAD_SBYTE_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4)
-    GLOBAL_STORE_BYTE_D16_HI %11, %47, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %48:vgpr_32 = GLOBAL_LOAD_UBYTE_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4)
-    GLOBAL_STORE_BYTE_D16_HI %11, %48, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %49:vgpr_32 = GLOBAL_LOAD_SBYTE_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4)
-    GLOBAL_STORE_BYTE_D16_HI %11, %49, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %50:vgpr_32 = GLOBAL_LOAD_UBYTE_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4)
-    GLOBAL_STORE_BYTE_D16_HI %11, %50, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %51:vgpr_32 = GLOBAL_LOAD_SHORT_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4)
-    GLOBAL_STORE_SHORT_D16_HI %11, %51, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
-    %52:vgpr_32 = GLOBAL_LOAD_SHORT_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4)
-    GLOBAL_STORE_SHORT_D16_HI %11, %52, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %10:vgpr_32 = GLOBAL_LOAD_DWORD %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_DWORD %11, %10, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %40:vreg_64 = GLOBAL_LOAD_DWORDX2 %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_DWORDX2 %11, %40, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %41:vreg_96 = GLOBAL_LOAD_DWORDX3 %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_DWORDX3 %11, %41, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %42:vreg_128 = GLOBAL_LOAD_DWORDX4 %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_DWORDX4 %11, %42, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %43:vgpr_32 = GLOBAL_LOAD_SSHORT %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_SHORT %11, %43, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %44:vgpr_32 = GLOBAL_LOAD_USHORT %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_SHORT %11, %44, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %45:vgpr_32 = GLOBAL_LOAD_UBYTE %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_BYTE %11, %45, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %46:vgpr_32 = GLOBAL_LOAD_SBYTE %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+    GLOBAL_STORE_BYTE %11, %46, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %47:vgpr_32 = GLOBAL_LOAD_SBYTE_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+    GLOBAL_STORE_BYTE_D16_HI %11, %47, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %48:vgpr_32 = GLOBAL_LOAD_UBYTE_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+    GLOBAL_STORE_BYTE_D16_HI %11, %48, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %49:vgpr_32 = GLOBAL_LOAD_SBYTE_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+    GLOBAL_STORE_BYTE_D16_HI %11, %49, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %50:vgpr_32 = GLOBAL_LOAD_UBYTE_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+    GLOBAL_STORE_BYTE_D16_HI %11, %50, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %51:vgpr_32 = GLOBAL_LOAD_SHORT_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+    GLOBAL_STORE_SHORT_D16_HI %11, %51, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    %52:vgpr_32 = GLOBAL_LOAD_SHORT_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+    GLOBAL_STORE_SHORT_D16_HI %11, %52, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
 
     %53:vgpr_32 = GLOBAL_ATOMIC_XOR_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %53, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %53, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_XOR %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %54:vgpr_32 = GLOBAL_ATOMIC_SMIN_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %54, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %54, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SMIN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %55:vgpr_32 = GLOBAL_ATOMIC_AND_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %55, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %55, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_AND %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %56:vgpr_32 = GLOBAL_ATOMIC_SWAP_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %56, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %56, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SWAP %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %57:vgpr_32 = GLOBAL_ATOMIC_SMAX_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %57, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %57, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SMAX %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %58:vgpr_32 = GLOBAL_ATOMIC_UMIN_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %58, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %58, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_UMIN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %59:vgpr_32 = GLOBAL_ATOMIC_UMAX_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %59, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %59, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_UMAX %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %60:vgpr_32 = GLOBAL_ATOMIC_OR_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %60, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %60, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_OR %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %61:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %61, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %61, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_ADD %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %62:vgpr_32 = GLOBAL_ATOMIC_SUB_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %62, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %62, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SUB %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %63:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %63, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %63, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_CMPSWAP %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %64:vgpr_32 = GLOBAL_ATOMIC_INC_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %64, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %64, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_INC %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %65:vgpr_32 = GLOBAL_ATOMIC_DEC_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORD %11, %65, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORD %11, %65, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_DEC %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %66:vreg_64 = GLOBAL_ATOMIC_OR_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %66, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %66, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_OR_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %67:vreg_64 = GLOBAL_ATOMIC_XOR_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %67, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %67, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_XOR_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %68:vreg_64 = GLOBAL_ATOMIC_AND_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %68, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %68, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_AND_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %69:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %69, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %69, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_ADD_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %70:vreg_64 = GLOBAL_ATOMIC_SUB_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %70, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %70, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SUB_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %71:vreg_64 = GLOBAL_ATOMIC_DEC_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %71, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %71, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_DEC_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %72:vreg_64 = GLOBAL_ATOMIC_INC_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %72, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %72, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_INC_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %73:vreg_64 = GLOBAL_ATOMIC_SMIN_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %73, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %73, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SMIN_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %74:vreg_64 = GLOBAL_ATOMIC_SWAP_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %74, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %74, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SWAP_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %75:vreg_64 = GLOBAL_ATOMIC_SMAX_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %75, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %75, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_SMAX_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %76:vreg_64 = GLOBAL_ATOMIC_UMIN_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %76, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %76, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_UMIN_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %77:vreg_64 = GLOBAL_ATOMIC_UMAX_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %77, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %77, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_UMAX_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     %79:sreg_128 = REG_SEQUENCE %4, %subreg.sub0, %4, %subreg.sub1, %4, %subreg.sub2, %4, %subreg.sub3
     %80:vreg_128 = COPY %79
 
     %78:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
-    GLOBAL_STORE_DWORDX2 %11, %78, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %11, %78, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
     GLOBAL_ATOMIC_CMPSWAP_X2 %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
 
     S_ENDPGM 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir Tue Apr 30 15:08:23 2019
@@ -12,7 +12,7 @@ body:             |
   bb.0.entry:
     liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9, $vgpr10
   
-    BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, implicit $exec
     $vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec
     S_ENDPGM 0
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir Tue Apr 30 15:08:23 2019
@@ -16,7 +16,7 @@ name: hazard-inlineasm
 
 body: |
   bb.0:
-   FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, implicit $exec, implicit $flat_scr
+   FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
    INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def $vgpr26_vgpr27, 2818058, def dead $sgpr14_sgpr15, 589833, $sgpr12, 327689, killed $vgpr51, 2621449, $vgpr46_vgpr47
    S_ENDPGM 0
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir Tue Apr 30 15:08:23 2019
@@ -21,7 +21,7 @@ body:             |
     liveins: $sgpr2, $sgpr3, $sgpr4
   
     $sgpr6 = S_MOV_B32 killed $sgpr3
-    renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0
+    renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0, 0
     $m0 = S_MOV_B32 killed renamable $sgpr4
     dead renamable $sgpr0 = KILL undef renamable $sgpr2
     renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $m0, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll Tue Apr 30 15:08:23 2019
@@ -12,7 +12,7 @@ define amdgpu_kernel void @extract_w_off
   ; GCN: bb.0.entry:
   ; GCN:   successors: %bb.1(0x80000000)
   ; GCN:   liveins: $vgpr0, $sgpr0_sgpr1
-  ; GCN:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
+  ; GCN:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
   ; GCN:   renamable $sgpr2 = COPY renamable $sgpr1
   ; GCN:   renamable $sgpr4 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
   ; GCN:   renamable $sgpr5 = S_MOV_B32 61440
@@ -101,7 +101,7 @@ define amdgpu_kernel void @extract_w_off
   ; GCN:   $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
   ; GCN:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
   ; GCN:   $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
-  ; GCN:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
+  ; GCN:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
   ; GCN:   S_ENDPGM
 entry:
   %id = call i32 @llvm.amdgcn.workitem.id.x() #1

Modified: llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir Tue Apr 30 15:08:23 2019
@@ -49,10 +49,10 @@ body:             |
   bb.0 (%ir-block.2):
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
-    $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
-    $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+    $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
     EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec
     $vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec
     $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir Tue Apr 30 15:08:23 2019
@@ -230,28 +230,28 @@ name: vmem_gt_8dw_store
 
 body: |
   bb.0:
-    BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-    BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-    BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-    BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-    BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-    BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
     BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
     S_BRANCH %bb.1
 
   bb.1:
-    FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-    FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-    FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
     FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr3 = V_MOV_B32_e32 0, implicit $exec
@@ -549,14 +549,14 @@ body:             |
     $flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
     $flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
     DBG_VALUE $noreg, 2, !5, !11, debug-location !12
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
     $sgpr8 = S_MOV_B32 $sgpr5
     $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
+    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
     $sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
     $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
+    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir Tue Apr 30 15:08:23 2019
@@ -55,7 +55,7 @@ body:             |
   bb.0.entry:
     liveins: $sgpr0_sgpr1
 
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
@@ -64,7 +64,7 @@ body:             |
     liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
 
     $vgpr0 = V_MOV_B32_e32 100, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
     $vgpr0 = V_MOV_B32_e32 1, implicit $exec
     S_BRANCH %bb.3
 
@@ -72,7 +72,7 @@ body:             |
     liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
 
     $vgpr0 = V_MOV_B32_e32 9, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
     $vgpr0 = V_MOV_B32_e32 0, implicit $exec
 
   bb.3.done:
@@ -80,7 +80,7 @@ body:             |
 
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir Tue Apr 30 15:08:23 2019
@@ -57,15 +57,15 @@ body:             |
     %4.sub1 = COPY %3.sub0
     undef %5.sub0 = COPY %4.sub1
     %5.sub1 = COPY %4.sub0
-    FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %6 = IMPLICIT_DEF
     undef %7.sub0_sub1 = COPY %6
     %7.sub2 = COPY %3.sub0
-    FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
     %8 = IMPLICIT_DEF
     undef %9.sub0_sub1_sub2 = COPY %8
     %9.sub3 = COPY %3.sub0
-    FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 ...

Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll?rev=359621&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll Tue Apr 30 15:08:23 2019
@@ -0,0 +1,19 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+
+declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() #0
+
+; GCN-LABEL: {{^}}test_s_get_waveid_in_workgroup:
+; GFX10: global_store_dword
+; GFX10: s_get_waveid_in_workgroup [[DEST:s[0-9]+]]
+; GFX10: s_waitcnt lgkmcnt(0)
+; GFX10: v_mov_b32_e32 [[VDEST:v[0-9]+]], [[DEST]]
+; GFX10: global_store_dword v[{{[0-9:]+}}], [[VDEST]], off
+define amdgpu_kernel void @test_s_get_waveid_in_workgroup(i32 addrspace(1)* %out) {
+; Make sure %out is loaded and assiciated wait count already inserted
+  store i32 0, i32 addrspace(1)* %out
+  %v = call i32 @llvm.amdgcn.s.get.waveid.in.workgroup()
+  store i32 %v, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir Tue Apr 30 15:08:23 2019
@@ -80,13 +80,13 @@ body:             |
     successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000)
     liveins: $vgpr0, $sgpr0_sgpr1
 
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec
     $vgpr1_vgpr2 = V_LSHL_B64 $vgpr0_vgpr1, 3, implicit $exec
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 0
     S_WAITCNT 127
-    $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
+    $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
     $vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec
     V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -97,7 +97,7 @@ body:             |
     successors: %bb.2.exit(0x80000000)
     liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
     dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec
     $sgpr4_sgpr5 = S_MOV_B64 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir Tue Apr 30 15:08:23 2019
@@ -11,10 +11,10 @@ body:             |
 
     $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
     $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
-    renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
+    renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -30,7 +30,7 @@ body:             |
     $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir Tue Apr 30 15:08:23 2019
@@ -13,14 +13,14 @@
 name:            load_singlethread_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -37,14 +37,14 @@ body:             |
 name:            load_singlethread_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -61,14 +61,14 @@ body:             |
 name:            load_singlethread_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -85,14 +85,14 @@ body:             |
 name:            load_singlethread_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -109,14 +109,14 @@ body:             |
 name:            load_wavefront_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -133,14 +133,14 @@ body:             |
 name:            load_wavefront_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -157,14 +157,14 @@ body:             |
 name:            load_wavefront_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -181,14 +181,14 @@ body:             |
 name:            load_wavefront_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -205,14 +205,14 @@ body:             |
 name:            load_workgroup_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -229,14 +229,14 @@ body:             |
 name:            load_workgroup_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -253,14 +253,14 @@ body:             |
 name:            load_workgroup_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -277,14 +277,14 @@ body:             |
 name:            load_workgroup_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -301,14 +301,14 @@ body:             |
 name:            load_agent_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -325,14 +325,14 @@ body:             |
 name:            load_agent_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -349,14 +349,14 @@ body:             |
 name:            load_agent_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -373,14 +373,14 @@ body:             |
 name:            load_agent_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -397,14 +397,14 @@ body:             |
 name:            load_system_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -421,14 +421,14 @@ body:             |
 name:            load_system_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -445,14 +445,14 @@ body:             |
 name:            load_system_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -469,14 +469,14 @@ body:             |
 name:            load_system_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -493,8 +493,8 @@ body:             |
 name:            store_singlethread_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -515,8 +515,8 @@ body:             |
 name:            store_singlethread_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -537,8 +537,8 @@ body:             |
 name:            store_singlethread_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -559,8 +559,8 @@ body:             |
 name:            store_singlethread_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -581,8 +581,8 @@ body:             |
 name:            store_wavefront_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -603,8 +603,8 @@ body:             |
 name:            store_wavefront_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -625,8 +625,8 @@ body:             |
 name:            store_wavefront_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -647,8 +647,8 @@ body:             |
 name:            store_wavefront_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -669,8 +669,8 @@ body:             |
 name:            store_workgroup_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -691,8 +691,8 @@ body:             |
 name:            store_workgroup_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -713,8 +713,8 @@ body:             |
 name:            store_workgroup_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -735,8 +735,8 @@ body:             |
 name:            store_workgroup_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -757,8 +757,8 @@ body:             |
 name:            store_agent_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -779,8 +779,8 @@ body:             |
 name:            store_agent_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -801,8 +801,8 @@ body:             |
 name:            store_agent_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -823,8 +823,8 @@ body:             |
 name:            store_agent_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -845,8 +845,8 @@ body:             |
 name:            store_system_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -867,8 +867,8 @@ body:             |
 name:            store_system_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -889,8 +889,8 @@ body:             |
 name:            store_system_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -911,8 +911,8 @@ body:             |
 name:            store_system_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -933,8 +933,8 @@ body:             |
 name:            atomicrmw_singlethread_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -955,8 +955,8 @@ body:             |
 name:            atomicrmw_singlethread_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -977,8 +977,8 @@ body:             |
 name:            atomicrmw_singlethread_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -999,8 +999,8 @@ body:             |
 name:            atomicrmw_singlethread_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1021,8 +1021,8 @@ body:             |
 name:            atomicrmw_singlethread_acq_rel
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1043,8 +1043,8 @@ body:             |
 name:            atomicrmw_singlethread_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir Tue Apr 30 15:08:23 2019
@@ -16,27 +16,27 @@ body:             |
     successors: %bb.1(0x30000000), %bb.2(0x50000000)
     liveins: $sgpr0_sgpr1, $sgpr3
 
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
     S_WAITCNT 127
     S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 2, implicit $exec
     $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
+    BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
     S_CBRANCH_SCC0 %bb.1, implicit killed $scc
 
   bb.2:
     successors: %bb.3(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 32772, implicit $exec
     S_BRANCH %bb.3
@@ -45,7 +45,7 @@ body:             |
     successors: %bb.3(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
 
@@ -55,11 +55,11 @@ body:             |
     S_WAITCNT 127
     $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
     $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
     $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
     $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
     S_WAITCNT 3952
-    FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`)
+    FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir Tue Apr 30 15:08:23 2019
@@ -62,7 +62,7 @@
 # CHECK-LABEL: name: multiple_mem_operands
 
 # CHECK-LABEL: bb.3.done:
-# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 1, 1, 0
+# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 1, 1, 0, 0
 
 name:            multiple_mem_operands
 alignment:       0
@@ -110,27 +110,27 @@ body:             |
     successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
     liveins: $sgpr0_sgpr1, $sgpr3
 
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
     S_WAITCNT 127
     S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 2, implicit $exec
     $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
+    BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
     S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
 
   bb.2.else:
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 32772, implicit $exec
     S_BRANCH %bb.3.done
@@ -139,7 +139,7 @@ body:             |
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
 
@@ -149,11 +149,11 @@ body:             |
     S_WAITCNT 127
     $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
     $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
     $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
     $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
     S_WAITCNT 3952
-    FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
+    FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir Tue Apr 30 15:08:23 2019
@@ -42,7 +42,7 @@
 # CHECK-LABEL: name: multiple_mem_operands
 
 # CHECK-LABEL: bb.3.done:
-# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0
+# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0
 
 name:            multiple_mem_operands
 alignment:       0
@@ -90,27 +90,27 @@ body:             |
     successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
     liveins: $sgpr0_sgpr1, $sgpr3
 
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
     S_WAITCNT 127
     S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 2, implicit $exec
     $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
+    BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
     S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
 
   bb.2.else:
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 32772, implicit $exec
     S_BRANCH %bb.3.done
@@ -119,7 +119,7 @@ body:             |
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
 
@@ -129,11 +129,11 @@ body:             |
     S_WAITCNT 127
     $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
     $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
     $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
     $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
     S_WAITCNT 3952
-    FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
+    FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir Tue Apr 30 15:08:23 2019
@@ -13,14 +13,14 @@
 name:            load_singlethread_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -37,14 +37,14 @@ body:             |
 name:            load_singlethread_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -61,14 +61,14 @@ body:             |
 name:            load_singlethread_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -85,14 +85,14 @@ body:             |
 name:            load_singlethread_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -109,14 +109,14 @@ body:             |
 name:            load_wavefront_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -133,14 +133,14 @@ body:             |
 name:            load_wavefront_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -157,14 +157,14 @@ body:             |
 name:            load_wavefront_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -181,14 +181,14 @@ body:             |
 name:            load_wavefront_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -205,14 +205,14 @@ body:             |
 name:            load_workgroup_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -229,14 +229,14 @@ body:             |
 name:            load_workgroup_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -253,14 +253,14 @@ body:             |
 name:            load_workgroup_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -277,14 +277,14 @@ body:             |
 name:            load_workgroup_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -301,14 +301,14 @@ body:             |
 name:            load_agent_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -325,14 +325,14 @@ body:             |
 name:            load_agent_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -349,14 +349,14 @@ body:             |
 name:            load_agent_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -373,14 +373,14 @@ body:             |
 name:            load_agent_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -397,14 +397,14 @@ body:             |
 name:            load_system_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -421,14 +421,14 @@ body:             |
 name:            load_system_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -445,14 +445,14 @@ body:             |
 name:            load_system_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -469,14 +469,14 @@ body:             |
 name:            load_system_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
-    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
     S_ENDPGM 0
 
 ...
@@ -493,8 +493,8 @@ body:             |
 name:            store_singlethread_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -515,8 +515,8 @@ body:             |
 name:            store_singlethread_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -537,8 +537,8 @@ body:             |
 name:            store_singlethread_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -559,8 +559,8 @@ body:             |
 name:            store_singlethread_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -581,8 +581,8 @@ body:             |
 name:            store_wavefront_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -603,8 +603,8 @@ body:             |
 name:            store_wavefront_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -625,8 +625,8 @@ body:             |
 name:            store_wavefront_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -647,8 +647,8 @@ body:             |
 name:            store_wavefront_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -669,8 +669,8 @@ body:             |
 name:            store_workgroup_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -691,8 +691,8 @@ body:             |
 name:            store_workgroup_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -713,8 +713,8 @@ body:             |
 name:            store_workgroup_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -735,8 +735,8 @@ body:             |
 name:            store_workgroup_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -757,13 +757,14 @@ body:             |
 name:            store_agent_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
     DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`)
     S_ENDPGM 0
+
 ...
 ---
 
@@ -778,8 +779,8 @@ body:             |
 name:            store_agent_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -800,8 +801,8 @@ body:             |
 name:            store_agent_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -822,8 +823,8 @@ body:             |
 name:            store_agent_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -844,8 +845,8 @@ body:             |
 name:            store_system_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -866,8 +867,8 @@ body:             |
 name:            store_system_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -888,8 +889,8 @@ body:             |
 name:            store_system_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -910,8 +911,8 @@ body:             |
 name:            store_system_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -932,8 +933,8 @@ body:             |
 name:            atomicrmw_singlethread_unordered
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -954,8 +955,8 @@ body:             |
 name:            atomicrmw_singlethread_monotonic
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -976,8 +977,8 @@ body:             |
 name:            atomicrmw_singlethread_acquire
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -998,8 +999,8 @@ body:             |
 name:            atomicrmw_singlethread_release
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1020,8 +1021,8 @@ body:             |
 name:            atomicrmw_singlethread_acq_rel
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1042,8 +1043,8 @@ body:             |
 name:            atomicrmw_singlethread_seq_cst
 body:             |
   bb.0:
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
     $m0 = S_MOV_B32 -1
     $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir Tue Apr 30 15:08:23 2019
@@ -2,12 +2,12 @@
 
 # GCN-LABEL: {{^}}name: vector_clause{{$}}
 # GCN:         early-clobber %2:vreg_128, early-clobber %4:vreg_128, early-clobber %1:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT:    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-# GCN-NEXT:    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-# GCN-NEXT:    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
+# GCN-NEXT:    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT:    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT:    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
 # GCN-NEXT:  }
-# GCN-NEXT:  GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT:  GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            vector_clause
@@ -21,24 +21,24 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, implicit $exec
+    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: subreg_full{{$}}
 # GCN:      early-clobber %1:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:   undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
-# GCN-NEXT:   internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
-# GCN-NEXT:   internal %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
-# GCN-NEXT:   internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
+# GCN-NEXT:   undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT:   internal %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT:   internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            subreg_full
@@ -49,20 +49,20 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
-    %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
-    %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
-    %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+    undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+    %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+    %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+    %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: subreg_part{{$}}
 # GCN:      undef early-clobber %1.sub0_sub1:vreg_128, undef early-clobber %1.sub3:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:   undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
-# GCN-NEXT:   internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
-# GCN-NEXT:   internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
+# GCN-NEXT:   undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT:   internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            subreg_part
@@ -73,18 +73,18 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
-    %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
-    %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+    undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+    %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+    %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: dead{{$}}
 # GCN:      dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:   dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-# GCN-NEXT:   dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-# GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
+# GCN-NEXT:   dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 
 ---
@@ -99,18 +99,18 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-    dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-    dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-    dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
+    dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+    dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+    dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+    dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: subreg_dead{{$}}
 # GCN:      early-clobber %1:vreg_64 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:    %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
-# GCN-NEXT:    dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
+# GCN-NEXT:    %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT:    dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
-# GCN-NEXT: GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            subreg_dead
@@ -121,15 +121,15 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    undef %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
-    dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, implicit $exec
+    undef %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+    dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: kill{{$}}
 # GCN:      early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, %1, implicit $exec {
-# GCN-NEXT:   %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec
+# GCN-NEXT:   %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 
 ---
@@ -144,17 +144,17 @@ body:             |
   bb.0:
     %0 = IMPLICIT_DEF
     %1 = IMPLICIT_DEF
-    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, implicit $exec
+    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: indirect{{$}}
-# GCN:      %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, implicit $exec
+# GCN:      %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT:   early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %1, implicit $exec {
-# GCN-NEXT:   %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, implicit $exec
-# GCN-NEXT:   %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec
+# GCN-NEXT:   %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 
 ---
@@ -168,18 +168,18 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, implicit $exec
-    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, implicit $exec
-    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, implicit $exec
+    %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec
+    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec
+    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: stack{{$}}
 # GCN:      %0:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, implicit $exec
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            stack
@@ -193,33 +193,33 @@ stack:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, implicit $exec
-    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, implicit $exec
+    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec
+    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: overflow_counter{{$}}
 # GCN:      dead early-clobber %7:vgpr_32, dead early-clobber %14:vgpr_32, dead early-clobber %2:vgpr_32, dead early-clobber %9:vgpr_32, dead early-clobber %4:vgpr_32, dead early-clobber %11:vgpr_32, dead early-clobber %6:vgpr_32, dead early-clobber %13:vgpr_32, dead early-clobber %1:vgpr_32, dead early-clobber %8:vgpr_32, dead early-clobber %15:vgpr_32, dead early-clobber %3:vgpr_32, dead early-clobber %10:vgpr_32, dead early-clobber %5:vgpr_32, dead early-clobber %12:vgpr_32 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:   dead %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, implicit $exec
-# GCN-NEXT:   dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, implicit $exec
-# GCN-NEXT:   dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, implicit $exec
-# GCN-NEXT:   dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
-# GCN-NEXT:   dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, implicit $exec
-# GCN-NEXT:   dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, implicit $exec
-# GCN-NEXT:   dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, implicit $exec
-# GCN-NEXT:   dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
-# GCN-NEXT:   dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, implicit $exec
-# GCN-NEXT:   dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, implicit $exec
-# GCN-NEXT:   dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, implicit $exec
-# GCN-NEXT:   dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, implicit $exec
-# GCN-NEXT:   dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, implicit $exec
-# GCN-NEXT:   dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, implicit $exec
+# GCN-NEXT:   dead %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 # GCN-NEXT: dead early-clobber %16:vgpr_32, dead early-clobber %17:vgpr_32 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:   dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, implicit $exec
-# GCN-NEXT:   dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, implicit $exec
+# GCN-NEXT:   dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 
 ---
@@ -247,36 +247,36 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, implicit $exec
-    %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, implicit $exec
-    %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, implicit $exec
-    %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, implicit $exec
-    %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
-    %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, implicit $exec
-    %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, implicit $exec
-    %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, implicit $exec
-    %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
-    %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, implicit $exec
-    %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, implicit $exec
-    %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, implicit $exec
-    %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, implicit $exec
-    %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, implicit $exec
-    %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, implicit $exec
-    %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, implicit $exec
-    %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, implicit $exec
+    %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
+    %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec
+    %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec
+    %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec
+    %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+    %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec
+    %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec
+    %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
+    %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec
+    %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec
+    %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec
+    %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec
+    %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec
+    %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec
+    %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec
+    %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: reg_pressure{{$}}
 # GCN:      dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:   dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-# GCN-NEXT:   dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-# GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
-# GCN-NEXT:   dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, implicit $exec
+# GCN-NEXT:   dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 # GCN-NEXT: dead early-clobber %7:vreg_128, dead early-clobber %6:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT:   dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, implicit $exec
-# GCN-NEXT:   dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, implicit $exec
+# GCN-NEXT:   dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 
 ---
@@ -294,13 +294,13 @@ registers:
 body:             |
   bb.0:
     %0 = IMPLICIT_DEF
-    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
-    %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, implicit $exec
-    %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, implicit $exec
-    %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, implicit $exec
+    %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+    %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+    %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
+    %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec
+    %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec
+    %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: image_clause{{$}}
@@ -336,8 +336,8 @@ body:             |
 # GCN-LABEL: {{^}}name: mixed_clause{{$}}
 # GCN:      dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
 # GCN-NEXT:   dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
 
 ---
@@ -356,8 +356,8 @@ body:             |
     %1 = IMPLICIT_DEF
     %2 = IMPLICIT_DEF
     %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-    %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+    %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+    %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: atomic{{$}}

Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir Tue Apr 30 15:08:23 2019
@@ -23,7 +23,7 @@
 # COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
 # COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
 # COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
 # COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec
 # COMMON-LABEL  bb.2:
@@ -47,7 +47,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
     $sgpr30_sgpr31 = COPY %5
     $vgpr0 = COPY %7
     S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -69,7 +69,7 @@ body:             |
 # COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
 # COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
 # COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
 # COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec
 # COMMON-LABEL  bb.2:
@@ -93,7 +93,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
     $sgpr30_sgpr31 = COPY %5
     $vgpr0 = COPY %7
     S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -115,7 +115,7 @@ body:             |
 # COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
 # COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
 # COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
 # COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec
 # COMMON-LABEL  bb.2:
@@ -139,7 +139,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
     $sgpr30_sgpr31 = COPY %5
     $vgpr0 = COPY %7
     S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -155,7 +155,7 @@ body:             |
 # COMMON: %9:vgpr_32 = V_ADD_I32_e32 %12.sub0, %4.sub0, implicit-def $vcc, implicit $exec
 # COMMON: %10:vgpr_32 = V_ADDC_U32_e32 %12.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
 # COMMON: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
 ---
 name:            addr64
 liveins:
@@ -175,7 +175,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
     $sgpr30_sgpr31 = COPY %5
     $vgpr0 = COPY %7
     S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -198,7 +198,7 @@ body:             |
 # NO-ADDR64: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
 # NO-ADDR64: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
 # NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
 # NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
 # NO-ADDR64-LABEL  bb.2:
@@ -211,7 +211,7 @@ body:             |
 # ADDR64: [[RSRCFMTHI:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440
 # ADDR64: [[ZERORSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[ZERO64]], %subreg.sub0_sub1, [[RSRCFMTLO]], %subreg.sub2, [[RSRCFMTHI]], %subreg.sub3
 # ADDR64: [[VADDR64:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[RSRCPTR]].sub0, %subreg.sub0, [[RSRCPTR]].sub1, %subreg.sub1
-# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, implicit $exec
+# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            offset
@@ -232,7 +232,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
     $sgpr30_sgpr31 = COPY %5
     $vgpr0 = COPY %7
     S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0

Modified: llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir Tue Apr 30 15:08:23 2019
@@ -105,8 +105,8 @@ body:             |
 
     %3 = COPY $sgpr0_sgpr1
     %2 = COPY $vgpr0
-    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
-    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
+    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
     %6 = COPY %7
     %9 = S_MOV_B32 0
     %10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1
@@ -137,7 +137,7 @@ body:             |
     %28 = REG_SEQUENCE %6, 17, killed %27, 18
     %29 = V_MOV_B32_e32 0, implicit $exec
     %30 = COPY %24
-    BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.bb2:
     SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -203,9 +203,9 @@ body:             |
 
     %3 = COPY $sgpr0_sgpr1
     %2 = COPY $vgpr0
-    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
-    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
-    %9 = S_LOAD_DWORDX2_IMM %3, 13, 0
+    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
+    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
+    %9 = S_LOAD_DWORDX2_IMM %3, 13, 0, 0
     %6 = COPY %7
     %10 = S_MOV_B32 0
     %11 = REG_SEQUENCE %2, %subreg.sub0, killed %10, %subreg.sub1
@@ -243,7 +243,7 @@ body:             |
     %37 = REG_SEQUENCE %6, 17, killed %36, 18
     %38 = V_MOV_B32_e32 0, implicit $exec
     %39 = COPY %33
-    BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.bb2:
     SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -300,8 +300,8 @@ body:             |
 
     %3 = COPY $sgpr0_sgpr1
     %2 = COPY $vgpr0
-    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
-    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
+    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
     %6 = COPY %7
     %9 = S_MOV_B32 0
     %10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1
@@ -332,7 +332,7 @@ body:             |
     %28 = REG_SEQUENCE %6, 17, killed %27, 18
     %29 = V_MOV_B32_e32 0, implicit $exec
     %30 = COPY %24
-    BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.bb2:
     SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir Tue Apr 30 15:08:23 2019
@@ -151,7 +151,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -159,7 +159,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -188,7 +188,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -196,7 +196,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -225,7 +225,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -233,14 +233,14 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
 ---
 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
-# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
 # CHECK-NEXT: SI_MASK_BRANCH
@@ -255,7 +255,7 @@ body:             |
     $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
     $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
-    BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
     SI_MASK_BRANCH %bb.2, implicit $exec
@@ -266,7 +266,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -274,7 +274,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -304,7 +304,7 @@ body:             |
 
   bb.1.if:
     liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7
@@ -312,7 +312,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -346,7 +346,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -356,7 +356,7 @@ body:             |
     $sgpr1 = S_MOV_B32 1
     $sgpr2 = S_MOV_B32 -1
     $sgpr3 = S_MOV_B32 61440
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -387,7 +387,7 @@ body:             |
     S_SLEEP 0, implicit $sgpr2_sgpr3
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -395,7 +395,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -426,7 +426,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -434,7 +434,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -463,7 +463,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -471,7 +471,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -500,7 +500,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -508,7 +508,7 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -539,7 +539,7 @@ body:             |
 
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
-    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2.end:
     liveins: $vgpr0, $sgpr0_sgpr1
@@ -547,6 +547,6 @@ body:             |
     $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir Tue Apr 30 15:08:23 2019
@@ -27,12 +27,12 @@ body:             |
   ; CHECK:   liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK:   $sgpr5 = COPY $sgpr4
   ; CHECK:   $sgpr6 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc
-  ; CHECK:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
+  ; CHECK:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
   ; CHECK:   S_BRANCH %bb.1
   ; CHECK: bb.1:
   ; CHECK:   liveins: $sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK:   $sgpr4 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc
-  ; CHECK:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
+  ; CHECK:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
   ; CHECK:   S_ENDPGM 0, implicit $vgpr0
   bb.0:
     $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)

Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir Tue Apr 30 15:08:23 2019
@@ -8,7 +8,7 @@ name: diffoporder_add
 body:             |
   bb.0.entry:
     %0:sgpr_64 = COPY $sgpr0_sgpr1
-    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
     %4:sreg_32_xm0 = COPY $sgpr101
     %5:sreg_32_xm0 = S_MOV_B32 0
@@ -34,12 +34,12 @@ body:             |
     %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
-    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 6144
     %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
-    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
+    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
 ...
 ---
 
@@ -61,7 +61,7 @@ name: LowestInMiddle
 body:             |
   bb.0.entry:
     %0:sgpr_64 = COPY $sgpr0_sgpr1
-    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
     %4:sreg_32_xm0 = COPY $sgpr101
     %5:sreg_32_xm0 = S_MOV_B32 0
@@ -87,17 +87,17 @@ body:             |
     %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
-    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 6400
     %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
-    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
+    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
     %39:sgpr_32 = S_MOV_B32 11200
     %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
     %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
     %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
-    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec
+    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
 ...
 ---
 
@@ -114,7 +114,7 @@ name: NegativeDistance
 body:             |
   bb.0.entry:
     %0:sgpr_64 = COPY $sgpr0_sgpr1
-    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
     %4:sreg_32_xm0 = COPY $sgpr101
     %5:sreg_32_xm0 = S_MOV_B32 0
@@ -140,17 +140,17 @@ body:             |
     %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
-    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 8192
     %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
-    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
+    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
     %39:sgpr_32 = S_MOV_B32 10240
     %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
     %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
     %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
-    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec
+    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
 ...
 ---
 
@@ -159,7 +159,7 @@ name: assert_hit
 body:             |
     bb.0.entry:
     %0:sgpr_64 = COPY $sgpr0_sgpr1
-    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
     %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
     %4:sreg_32_xm0 = COPY $sgpr101
     %5:sreg_32_xm0 = S_MOV_B32 0
@@ -186,5 +186,5 @@ body:             |
     %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
-    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir Tue Apr 30 15:08:23 2019
@@ -22,7 +22,7 @@ body:       |
 
    $sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
    $sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
-   $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0
+   $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0, 0
    S_WAITCNT 127
    $vgpr0 = V_XOR_B32_e32 killed $sgpr10, killed $vgpr0, implicit $exec
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir Tue Apr 30 15:08:23 2019
@@ -185,9 +185,9 @@ body:             |
   bb.28:
     %9 = S_FF1_I32_B32 undef %10
     %13 = V_MAD_U32_U24 killed %9, 48, 32, 0, implicit $exec
-    %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+    %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
     %46 = V_AND_B32_e32 1, killed %45, implicit $exec
-    %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load 4)
+    %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0, 0 :: (dereferenceable invariant load 4)
     %25 = V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $exec
     %26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec
     %62 = IMPLICIT_DEF

Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir Tue Apr 30 15:08:23 2019
@@ -96,14 +96,14 @@ body:             |
     %0.sub0 = COPY killed %12
     %21 = COPY killed %18
     %21.sub0 = COPY killed %15
-    %22 = S_LOAD_DWORD_IMM killed %21, 2, 0
+    %22 = S_LOAD_DWORD_IMM killed %21, 2, 0, 0
     %23 = S_MOV_B32 491436
     undef %24.sub0 = COPY killed %22
     %24.sub1 = COPY killed %23
-    %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0
+    %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0, 0
     %1 = COPY killed %25
-    %26 = S_LOAD_DWORDX2_IMM %0, 2, 0
-    dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0
+    %26 = S_LOAD_DWORDX2_IMM %0, 2, 0, 0
+    dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0, 0
     S_CBRANCH_SCC0 %bb.1, implicit undef $scc
 
   bb.5:
@@ -129,8 +129,8 @@ body:             |
   bb.2:
     %4 = COPY killed %59
     %3 = COPY killed %58
-    %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0
-    %40 = S_LOAD_DWORD_IMM killed %39, 0, 0
+    %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0, 0
+    %40 = S_LOAD_DWORD_IMM killed %39, 0, 0, 0
     %43 = V_MOV_B32_e32 -1102263091, implicit $exec
     %60 = COPY killed %4
     %61 = COPY killed %3

Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir Tue Apr 30 15:08:23 2019
@@ -56,8 +56,8 @@ body:             |
 
     %3 = COPY killed $vgpr0
     %0 = COPY killed $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORD_IMM killed %0, 13, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     %18 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     undef %19.sub0 = COPY killed %3
     %19.sub1 = COPY killed %18
@@ -70,7 +70,7 @@ body:             |
     %13.sub2_sub3 = COPY killed %12
     %20 = V_LSHL_B64 killed %19, 2, implicit $exec
     %16 = COPY killed %5
-    BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+    BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir Tue Apr 30 15:08:23 2019
@@ -16,7 +16,7 @@ body:             |
     %23:vgpr_32 = V_CVT_U32_F32_e32 killed %21, implicit $exec
     %108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec
     undef %109.sub1:vreg_128 = COPY %108
-    %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0 :: (dereferenceable invariant load 4)
+    %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0, 0 :: (dereferenceable invariant load 4)
     S_CMP_EQ_U32 killed %28, 0, implicit-def $scc
     S_CBRANCH_SCC0 %bb.2, implicit killed $scc
   
@@ -47,7 +47,7 @@ body:             |
     S_BRANCH %bb.6
   
   bb.6:
-    %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0 :: (dereferenceable invariant load 4)
+    %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0, 0 :: (dereferenceable invariant load 4)
     %39:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec
     %40:vgpr_32 = V_MAD_F32 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec
     %41:vgpr_32 = V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $exec
@@ -83,7 +83,7 @@ body:             |
     S_BRANCH %bb.8
   
   bb.8:
-    dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0 :: (dereferenceable invariant load 4)
+    dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0, 0 :: (dereferenceable invariant load 4)
     %138:vreg_128 = COPY killed %111
   
   bb.9:

Modified: llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir Tue Apr 30 15:08:23 2019
@@ -63,12 +63,12 @@ body:             |
 
   bb.3:
     %1 = COPY killed %17
-    FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %14 = COPY %1.sub1
     %16 = COPY killed %1.sub0
     undef %15.sub0 = COPY killed %16
     %15.sub1 = COPY killed %14
-    FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 
 ...
@@ -134,10 +134,10 @@ body:             |
     %6.sub2 = COPY %6.sub0
 
   bb.2:
-    BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     $sgpr30_sgpr31 = COPY %5
     S_SETPC_B64_return $sgpr30_sgpr31
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir Tue Apr 30 15:08:23 2019
@@ -58,7 +58,7 @@ machineFunctionInfo:
 
 body: |
   bb.0:
-    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -76,7 +76,7 @@ machineFunctionInfo:
 
 body: |
   bb.0:
-    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
     S_DCACHE_WB
     S_ENDPGM 0
 ...
@@ -97,7 +97,7 @@ machineFunctionInfo:
 body: |
   bb.0:
     S_DCACHE_WB
-    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -132,11 +132,11 @@ machineFunctionInfo:
 
 body: |
   bb.0:
-    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
     S_ENDPGM 0
 
   bb.1:
-    S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0
+    S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0
     S_ENDPGM 0
 ...
 ...
@@ -164,7 +164,7 @@ body: |
     S_ENDPGM 0
 
   bb.1:
-    S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0
+    S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0
     S_ENDPGM 0
 ...
 ---
@@ -182,6 +182,6 @@ machineFunctionInfo:
 
 body: |
   bb.0:
-    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+    S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
     SI_RETURN_TO_EPILOG undef $vgpr0
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir Tue Apr 30 15:08:23 2019
@@ -23,8 +23,8 @@ body:             |
   ; CHECK:   liveins: $vgpr0
   ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; CHECK:   [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK:   [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, implicit $exec
-  ; CHECK:   [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, implicit $exec
+  ; CHECK:   [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, 0, implicit $exec
+  ; CHECK:   [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, 0, implicit $exec
   ; CHECK:   undef %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
   ; CHECK:   %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
   ; CHECK:   [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
@@ -40,19 +40,19 @@ body:             |
   ; CHECK:   [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
   ; CHECK:   undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $exec
   ; CHECK:   dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $exec
-  ; CHECK:   [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, implicit $exec
+  ; CHECK:   [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec
   ; CHECK:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK:   undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec
   ; CHECK:   %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $exec
-  ; CHECK:   GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec
-  ; CHECK:   %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, implicit $exec
-  ; CHECK:   [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, implicit $exec
-  ; CHECK:   dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec
-  ; CHECK:   dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, implicit $exec
+  ; CHECK:   GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec
+  ; CHECK:   %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, 0, implicit $exec
+  ; CHECK:   [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, 0, implicit $exec
+  ; CHECK:   dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, implicit $exec
+  ; CHECK:   dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, 0, implicit $exec
   ; CHECK:   [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 2, [[DEF2]], implicit $exec
-  ; CHECK:   dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, implicit $exec
+  ; CHECK:   dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, 0, implicit $exec
   ; CHECK:   S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
-  ; CHECK:   GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, implicit $exec
+  ; CHECK:   GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, 0, implicit $exec
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.2(0x80000000)
   ; CHECK:   S_SETREG_IMM32_B32 0, 1
@@ -69,14 +69,14 @@ body:             |
 
     %0:vgpr_32 = COPY $vgpr0
     %1:vreg_64 = IMPLICIT_DEF
-    %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, implicit $exec
-    %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, implicit $exec
+    %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, 0, implicit $exec
+    %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, 0, implicit $exec
     undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec
     %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
     %5:vreg_64 = COPY %2
     undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $exec
     %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $exec
-    %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, implicit $exec
+    %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, 0, implicit $exec
     %8:vreg_64 = IMPLICIT_DEF
     %9:vreg_64 = IMPLICIT_DEF
     %10:vreg_64 = IMPLICIT_DEF
@@ -90,15 +90,15 @@ body:             |
     %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $exec
     %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $exec
-    GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec
-    %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, implicit $exec
-    %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, implicit $exec
-    %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec
-    %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, implicit $exec
-    %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec
+    %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, 0, implicit $exec
+    %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, 0, implicit $exec
+    %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, implicit $exec
+    %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, 0, implicit $exec
+    %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, 0, implicit $exec
     %23:vreg_64 = V_LSHLREV_B64 2, %8, implicit $exec
     S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17
-    GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, 0, implicit $exec
 
   bb.1:
     S_SETREG_IMM32_B32 0, 1

Modified: llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir Tue Apr 30 15:08:23 2019
@@ -200,12 +200,12 @@ body:             |
     %2:vgpr_32 = COPY $vgpr2
     %1:vgpr_32 = COPY $vgpr1
     %0:vgpr_32 = COPY $vgpr0
-    %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0
-    %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0
-    %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0
+    %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0, 0
+    %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0, 0
+    %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0, 0
     %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0, 16, implicit-def dead $scc
     %12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1
     %13:vgpr_32 = V_MUL_LO_I32 0, %0, implicit $exec
@@ -217,29 +217,29 @@ body:             |
     %19:sreg_32_xm0_xexec = IMPLICIT_DEF
     %20:vgpr_32 = V_ADD_I32_e32 %19, %0, implicit-def dead $vcc, implicit $exec
     %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32 %20, 12, %7, 0, implicit $exec
-    %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, implicit $exec
+    %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, 0, implicit $exec
     %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32 %20, 48, %8, 0, implicit $exec
     %26:vreg_128 = IMPLICIT_DEF
-    undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0
+    undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0, 0
     %27.sub1:sreg_64_xexec = S_MOV_B32 0
     %28:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
     undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0, %28.sub0, implicit-def $scc
     %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1, %28.sub1, implicit-def dead $scc, implicit killed $scc
-    undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0
+    undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0, 0
     %27.sub0:sreg_64_xexec = IMPLICIT_DEF
     %31:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
     %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0, implicit-def $scc
     %33:sgpr_32 = S_ADDC_U32 %5.sub1, %31.sub1, implicit-def dead $scc, implicit killed $scc
     %34:vgpr_32 = IMPLICIT_DEF
     %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32 %23, %34, 0, 0, implicit $exec
-    %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, implicit $exec
+    %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, 0, implicit $exec
     undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec
     %38.sub0:vreg_64 = COPY %37.sub0
     %39:vreg_64 = V_LSHLREV_B64 3, %38, implicit $exec
     undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, 0, implicit $exec
     %42:vgpr_32 = COPY %33
     %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec
-    %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34)
+    %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34)
     undef %45.sub1:vreg_64 = IMPLICIT_DEF
     %45.sub0:vreg_64 = COPY %37.sub1
     %46:vreg_64 = V_LSHLREV_B64 3, %45, implicit $exec
@@ -247,7 +247,7 @@ body:             |
     %49:vgpr_32 = COPY %33
     %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec
     %51:vreg_64 = IMPLICIT_DEF
-    undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8)
+    undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8)
     %52.sub1:vreg_64 = IMPLICIT_DEF
     %53:vreg_64 = V_LSHLREV_B64 3, %52, implicit $exec
     undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, 0, implicit $exec
@@ -258,31 +258,31 @@ body:             |
     %59:sreg_64 = IMPLICIT_DEF
     %60:sreg_32_xm0 = S_ADD_U32 %5.sub0, %59.sub0, implicit-def $scc
     %61:sgpr_32 = S_ADDC_U32 %5.sub1, %59.sub1, implicit-def dead $scc, implicit killed $scc
-    %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4)
+    %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4)
     undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec
     %63.sub0:vreg_64 = COPY %62.sub0
     %64:vreg_64 = IMPLICIT_DEF
     undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, 0, implicit $exec
     %67:vgpr_32 = COPY %61
     %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec
-    %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58)
+    %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58)
     undef %70.sub1:vreg_64 = IMPLICIT_DEF
     %70.sub0:vreg_64 = IMPLICIT_DEF
     %71:vreg_64 = IMPLICIT_DEF
     undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, 0, implicit $exec
     %74:vgpr_32 = COPY %61
     %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec
-    %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, implicit $exec
+    %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, 0, implicit $exec
     %77:vgpr_32 = IMPLICIT_DEF
     %78:vgpr_32 = IMPLICIT_DEF
     %79:vgpr_32 = V_MUL_F32_e32 0, %77, implicit $exec
     %80:vgpr_32 = IMPLICIT_DEF
     %81:vgpr_32 = IMPLICIT_DEF
     %84:vgpr_32 = IMPLICIT_DEF
-    BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, implicit $exec
-    BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, 0, implicit $exec
     %85:vgpr_32 = IMPLICIT_DEF
     %86:vgpr_32 = IMPLICIT_DEF
     %87:vgpr_32 = IMPLICIT_DEF

Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir Tue Apr 30 15:08:23 2019
@@ -47,7 +47,7 @@ body:             |
     liveins: $sgpr4_sgpr5
 
     %1 = COPY $sgpr4_sgpr5
-    %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORD_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $m0 = S_MOV_B32 -1
     %7 = COPY %5
     %6 = DS_READ_B32 %7, 0, 0, implicit $m0, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir Tue Apr 30 15:08:23 2019
@@ -37,11 +37,11 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     %12 = S_MOV_B32 123
     %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
     %11 = V_ADD_I32_e32 %12, killed %10, implicit-def $vcc, implicit $exec
-    FLAT_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
 
@@ -80,9 +80,9 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
     %11 = V_TRUNC_F32_e64 0, killed %10, 1, 2, implicit-def $vcc, implicit $exec
-    FLAT_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir Tue Apr 30 15:08:23 2019
@@ -29,19 +29,19 @@ body:             |
     %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
     %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec
     %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
     %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
     %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %173:vgpr_32, %175:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %171, 0, implicit $exec
     %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec
     %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 ...
 
@@ -77,13 +77,13 @@ body:             |
 
     %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
     %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec
     %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
     %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 ...
 
@@ -113,7 +113,7 @@ body:             |
     %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 ...
 
@@ -143,7 +143,7 @@ body:             |
     %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 
 ...
@@ -172,7 +172,7 @@ body:             |
     %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 
 ...
@@ -201,7 +201,7 @@ body:             |
     %30:vreg_64 = COPY $sgpr0_sgpr1
     %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 
 ...
@@ -232,7 +232,7 @@ body:             |
     %30:vreg_64 = COPY $sgpr0_sgpr1
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 
 ...
@@ -263,7 +263,7 @@ body:             |
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %31:vreg_64 = COPY $vcc
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 
 ...
@@ -294,7 +294,7 @@ body:             |
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %31:vreg_64 = COPY $vcc
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 ...
 
@@ -325,7 +325,7 @@ body:             |
     %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 ...
 
@@ -356,7 +356,7 @@ body:             |
     %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
 ...
 
@@ -386,5 +386,5 @@ body:             |
     %31:vreg_64 = COPY killed $vcc
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
-    GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+    GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir Tue Apr 30 15:08:23 2019
@@ -89,7 +89,7 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 
     %5 = S_MOV_B32 65535
     %6 = S_MOV_B32 65535
@@ -139,7 +139,7 @@ body:             |
 
     %100 = V_MOV_B32_e32 %48, implicit $exec
 
-    FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
 
@@ -256,7 +256,7 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 
     %5 = S_MOV_B32 65535
     %6 = S_MOV_B32 65535
@@ -315,7 +315,7 @@ body:             |
 
     %100 = V_MOV_B32_e32 %60, implicit $exec
 
-    FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
 
@@ -401,7 +401,7 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 
     %5 = S_MOV_B32 65535
     %6 = S_MOV_B32 65535
@@ -442,6 +442,6 @@ body:             |
 
     %100 = V_MOV_B32_e32 $vcc_lo, implicit $exec
 
-    FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir Tue Apr 30 15:08:23 2019
@@ -36,8 +36,8 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
-    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 
     %5 = V_AND_B32_e32 65535, %3, implicit $exec
     %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
@@ -51,7 +51,7 @@ body:             |
 
     %13 = V_OR_B32_e64 %10, %12, implicit $exec
 
-    FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
 
@@ -88,14 +88,14 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
-    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 
     %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
     %10:sreg_32_xm0 = S_MOV_B32 255
     %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
     %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
-    FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     S_ENDPGM 0
 
 ...
@@ -131,14 +131,14 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
-    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 
     %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
     %10:sreg_32_xm0 = S_MOV_B32 65535
     %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
     %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
-    FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir Tue Apr 30 15:08:23 2019
@@ -203,7 +203,7 @@ body:             |
     liveins: $sgpr4_sgpr5
 
     %4 = COPY $sgpr4_sgpr5
-    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %8 = S_MOV_B64 0
     %7 = COPY %9
     %30 = V_MOV_B32_e32 1, implicit $exec
@@ -221,26 +221,26 @@ body:             |
     %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
     %16 = REG_SEQUENCE %14, 1, %15, 2
     %18 = COPY %16
-    %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
+    %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
     %60 = V_BFE_U32 %17, 8, 8, implicit $exec
     %61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec
     %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
     %66 = COPY %13
     %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %67 = REG_SEQUENCE %70, 1, killed %65, 2
-    FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
+    FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
     %37 = S_ADD_U32 %14, 4, implicit-def $scc
     %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
     %71 = COPY killed %37
     %72 = COPY killed %38
     %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
-    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
+    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
     %73 = V_BFE_U32 %40, 8, 8, implicit $exec
     %74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec
     %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
     %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %80 = REG_SEQUENCE %83, 1, killed %78, 2
-    FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
+    FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
     %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
     %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
     %57 = REG_SEQUENCE %55, 1, killed %56, 2
@@ -365,7 +365,7 @@ body:             |
     liveins: $sgpr4_sgpr5
 
     %4 = COPY $sgpr4_sgpr5
-    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %8 = S_MOV_B64 0
     %7 = COPY %9
     %30 = V_MOV_B32_e32 1, implicit $exec
@@ -384,26 +384,26 @@ body:             |
     %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
     %16 = REG_SEQUENCE %14, 1, %15, 2
     %18 = COPY %16
-    %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
+    %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
     %60 = V_BFE_U32 %17, 8, 8, implicit $exec
     %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec
     %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
     %66 = COPY %13
     %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %67 = REG_SEQUENCE %70, 1, killed %65, 2
-    FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
+    FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
     %37 = S_ADD_U32 %14, 4, implicit-def $scc
     %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
     %71 = COPY killed %37
     %72 = COPY killed %38
     %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
-    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
+    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
     %73 = V_BFE_U32 %40, 8, 8, implicit $exec
     %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec
     %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
     %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %80 = REG_SEQUENCE %83, 1, killed %78, 2
-    FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
+    FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
     %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
     %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
     %57 = REG_SEQUENCE %55, 1, killed %56, 2

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir Tue Apr 30 15:08:23 2019
@@ -41,7 +41,7 @@ body:             |
     %2 = COPY $sgpr30_sgpr31
     %1 = COPY $vgpr2_vgpr3
     %0 = COPY $vgpr0_vgpr1
-    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
 
     %12 = V_LSHRREV_B32_e64 16, %3, implicit $exec
     %13 = V_BCNT_U32_B32_e64 %3, killed %12, implicit-def $vcc, implicit $exec
@@ -56,6 +56,6 @@ body:             |
     %19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec
     %20 = V_MOV_B32_e64 %19, implicit $exec
 
-    FLAT_STORE_DWORD %0, %20, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    FLAT_STORE_DWORD %0, %20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31

Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir Tue Apr 30 15:08:23 2019
@@ -85,7 +85,7 @@ body:             |
   bb.0:
     %0:sreg_32_xm0 = COPY $sgpr5
     %1:vreg_64 = IMPLICIT_DEF
-    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
     ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
     dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0

Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir Tue Apr 30 15:08:23 2019
@@ -21,7 +21,7 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec
-    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
 
 ...
 
@@ -46,7 +46,7 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec
-    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
 
 ...
 
@@ -71,7 +71,7 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec
-    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
 
 ...
 
@@ -96,6 +96,6 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec
-    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir Tue Apr 30 15:08:23 2019
@@ -71,8 +71,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
     %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %27 = REG_SEQUENCE %3, 1, %26, 2
     %10 = S_MOV_B32 61440
@@ -81,11 +81,11 @@ body:             |
     %13 = REG_SEQUENCE killed %5, 17, %12, 18
     %28 = V_LSHL_B64 killed %27, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
     %29, %9 = V_ADD_I32_e64 %19, %17, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -155,8 +155,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
     %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %27 = REG_SEQUENCE %3, 1, %26, 2
     %10 = S_MOV_B32 61440
@@ -165,11 +165,11 @@ body:             |
     %13 = REG_SEQUENCE killed %5, 17, %12, 18
     %28 = V_LSHL_B64 killed %27, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
     %29, %9 = V_SUB_I32_e64 %19, %17, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -239,8 +239,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
     %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %27 = REG_SEQUENCE %3, 1, %26, 2
     %10 = S_MOV_B32 61440
@@ -249,11 +249,11 @@ body:             |
     %13 = REG_SEQUENCE killed %5, 17, %12, 18
     %28 = V_LSHL_B64 killed %27, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
     %29, %9 = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -322,8 +322,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
     %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %27 = REG_SEQUENCE %3, 1, %26, 2
     %10 = S_MOV_B32 61440
@@ -332,12 +332,12 @@ body:             |
     %13 = REG_SEQUENCE killed %5, 17, %12, 18
     %28 = V_LSHL_B64 killed %27, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
     %9 = S_MOV_B64 0
     %29, $vcc = V_ADDC_U32_e64 %19, %17, %9, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -407,8 +407,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
     %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %27 = REG_SEQUENCE %3, 1, %26, 2
     %10 = S_MOV_B32 61440
@@ -417,12 +417,12 @@ body:             |
     %13 = REG_SEQUENCE killed %5, 17, %12, 18
     %28 = V_LSHL_B64 killed %27, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
     $vcc = S_MOV_B64 0
     %29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
@@ -492,8 +492,8 @@ body:             |
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
     %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %27 = REG_SEQUENCE %3, 1, %26, 2
     %10 = S_MOV_B32 61440
@@ -502,11 +502,11 @@ body:             |
     %13 = REG_SEQUENCE killed %5, 17, %12, 18
     %28 = V_LSHL_B64 killed %27, 2, implicit $exec
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
-    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
-    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
     %29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
-    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir Tue Apr 30 15:08:23 2019
@@ -15,7 +15,7 @@ body: |
     ; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
     ; GCN: S_ENDPGM 0
     %0:sgpr_64 = COPY $sgpr4_sgpr5
-    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0
+    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0
     %2:sreg_32_xm0 = S_AND_B32 %1, 255, implicit-def $scc
     %3:sreg_32_xm0 = S_AND_B32 65535, %2, implicit-def $scc
     S_ENDPGM 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir Tue Apr 30 15:08:23 2019
@@ -36,8 +36,8 @@ body:             |
     %3.sub1:sgpr_128 = S_AND_B32 %2, 65535, implicit-def dead $scc
     %3.sub3:sgpr_128 = S_MOV_B32 151468
     %3.sub2:sgpr_128 = S_MOV_B32 -1
-    %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4)
-    %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load 8)
+    %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4)
+    %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0, 0 :: (dereferenceable invariant load 8)
     undef %9.sub0:vreg_128 = V_LSHL_ADD_U32 %6, 4, %4, implicit $exec
     %9.sub1:vreg_128 = V_LSHL_ADD_U32 %5, 4, %0, implicit $exec
     S_ENDPGM 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir Tue Apr 30 15:08:23 2019
@@ -19,7 +19,7 @@ body:             |
     %8:vgpr_32 = COPY %6
     %7:vgpr_32 = V_ADD_I32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec
     %10:sreg_32 = COPY %7
-    %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0
+    %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0
     $vgpr0 = COPY %9
     SI_RETURN_TO_EPILOG $vgpr0
 ...
@@ -43,7 +43,7 @@ body:             |
     %8:vgpr_32 = COPY %6
     %7:vgpr_32 = V_ADD_U32_e32 %4, killed %8, implicit $exec
     %10:sreg_32 = COPY %7
-    %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 :: (dereferenceable invariant load 4)
+    %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0 :: (dereferenceable invariant load 4)
     $vgpr0 = COPY %9
     SI_RETURN_TO_EPILOG $vgpr0
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir Tue Apr 30 15:08:23 2019
@@ -24,10 +24,10 @@ machineFunctionInfo:
   stackPtrOffsetReg: $sgpr32
 body: |
   bb.0:
-    %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, implicit $flat_scr, implicit $exec
-    %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, implicit $flat_scr, implicit $exec
+    %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $flat_scr, implicit $exec
+    %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $flat_scr, implicit $exec
     S_NOP 0, implicit %0
-    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
-    %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
+    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
+    %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
     S_NOP 0, implicit %1
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir Tue Apr 30 15:08:23 2019
@@ -165,12 +165,12 @@ body: |
 
     %18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $exec
     %19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec
-    %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0 :: (dereferenceable invariant load 16)
+    %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0, 0 :: (dereferenceable invariant load 16)
     %22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $exec
     %23:vgpr_32 = V_MAD_F32 0, %18, 0, 0, 0, 0, 0, 0, implicit $exec
     %24:vgpr_32 = COPY %20.sub3
     %25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $exec
-    %26:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sreg_128, 1056, 0 :: (dereferenceable invariant load 16)
+    %26:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sreg_128, 1056, 0, 0 :: (dereferenceable invariant load 16)
     %28:vgpr_32 = V_MAD_F32 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $exec
     %29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $exec
     %30:vgpr_32 = V_RCP_F32_e32 %29, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll Tue Apr 30 15:08:23 2019
@@ -1,9 +1,9 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-insert-skips < %s | FileCheck --check-prefix=GCN %s
 
 ; GCN-LABEL: name: syncscopes
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
 define void @syncscopes(
     i32 %agent,
     i32* %agent_out,

Modified: llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir Tue Apr 30 15:08:23 2019
@@ -44,7 +44,7 @@ body:             |
     liveins: $vgpr0, $sgpr4_sgpr5
 
     $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
-    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
+    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
     $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
     $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
     $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec
@@ -108,7 +108,7 @@ body:             |
     liveins: $vgpr0, $sgpr4_sgpr5
 
     $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
-    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
+    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
     $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
     $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
     $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir Tue Apr 30 15:08:23 2019
@@ -78,7 +78,7 @@ body:             |
   bb.0.entry:
     liveins: $sgpr0_sgpr1, $vcc
 
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec
@@ -88,7 +88,7 @@ body:             |
     liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
 
     $vgpr0 = V_MOV_B32_e32 9, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
     $vgpr0 = V_MOV_B32_e32 0, implicit $exec
     S_BRANCH %bb.3
 
@@ -96,7 +96,7 @@ body:             |
     liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
 
     $vgpr0 = V_MOV_B32_e32 100, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
     $vgpr0 = V_MOV_B32_e32 1, implicit $exec
 
   bb.3.done:
@@ -104,7 +104,7 @@ body:             |
 
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
     S_ENDPGM 0
 
 ...
@@ -140,7 +140,7 @@ body:             |
   bb.0.entry:
     liveins: $sgpr0_sgpr1
 
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
@@ -149,7 +149,7 @@ body:             |
     liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
 
     $vgpr0 = V_MOV_B32_e32 9, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
     $vgpr0 = V_MOV_B32_e32 0, implicit $exec
     S_BRANCH %bb.3
 
@@ -157,7 +157,7 @@ body:             |
     liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
 
     $vgpr0 = V_MOV_B32_e32 100, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
     $vgpr0 = V_MOV_B32_e32 1, implicit $exec
 
   bb.3.done:
@@ -165,7 +165,7 @@ body:             |
 
     $sgpr3 = S_MOV_B32 61440
     $sgpr2 = S_MOV_B32 -1
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir Tue Apr 30 15:08:23 2019
@@ -19,7 +19,7 @@ body:             |
     $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
 
   bb.1:
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
 ...
 # GCN-LABEL: name: vmem_vcc_branch_to_next
 # GCN:      bb.1:
@@ -40,7 +40,7 @@ body:             |
     S_BRANCH %bb.1
 
   bb.1:
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
 ...
 # GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_too_far
 # GCN:      bb.1:
@@ -61,7 +61,7 @@ body:             |
     $sgpr0 = S_MOV_B32 0
 
   bb.1:
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
 ...
 # GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_nops
 # GCN:      bb.1:
@@ -78,7 +78,7 @@ body:             |
     S_NOP 4
 
   bb.1:
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
 ...
 # GCN-LABEL: name: vmem_vcc_branch_around
 # GCN:      bb.2:
@@ -107,7 +107,7 @@ body:             |
     S_NOP 0
 
   bb.2:
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
 ...
 # GCN-LABEL: name: vmem_vcc_branch_backedge
 # GCN:      S_NOP
@@ -123,7 +123,7 @@ body:             |
 
     $vgpr0 = IMPLICIT_DEF
     $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
 
   bb.1:
     $vgpr0 = IMPLICIT_DEF
@@ -156,7 +156,7 @@ body:             |
     $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
 
   bb.2:
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
 ...
 # GCN-LABEL: name: vmem_vcc_self_loop
 # GCN:      S_NOP
@@ -172,7 +172,7 @@ body:             |
 
     $vgpr0 = IMPLICIT_DEF
     $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
     S_BRANCH %bb.0
 ...
@@ -198,7 +198,7 @@ body:             |
     successors: %bb.1
 
     $sgpr0 = S_MOV_B32 0
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
     S_BRANCH %bb.1
 ...
@@ -224,7 +224,7 @@ body:             |
     successors: %bb.1
 
     $sgpr0 = S_MOV_B32 0
-    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
     S_BRANCH %bb.1
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir Tue Apr 30 15:08:23 2019
@@ -13,8 +13,8 @@ body:             |
 
     $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
     $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
-    $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+    $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
     $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec
     $vgpr3 = V_CNDMASK_B32_e64 0, -1082130432, 0, 1065353216, killed $sgpr0_sgpr1, implicit $exec
     $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
@@ -23,7 +23,7 @@ body:             |
   bb.3:
     successors: %bb.1
 
-    $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+    $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
 
   bb.1:
     successors: %bb.5, %bb.2
@@ -43,7 +43,7 @@ body:             |
   bb.4:
     successors: %bb.3, %bb.1
 
-    $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+    $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
     $vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $exec
     V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec
     $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc

Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir Tue Apr 30 15:08:23 2019
@@ -37,8 +37,8 @@ body:             |
   bb.2:
     successors: %bb.3
 
-    renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0
-    renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0
+    renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0
+    renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0
 
   bb.3:
     successors: %bb.1, %bb.4
@@ -73,12 +73,12 @@ name: irreducible_loop_extended
 body: |
   bb.0:
     successors: %bb.1, %bb.2
-    $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0
+    $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0, 0
     S_CBRANCH_VCCZ %bb.2, implicit $vcc
 
   bb.1:
     successors: %bb.2
-    BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
 
   bb.2:
     successors: %bb.3, %bb.6
@@ -86,18 +86,18 @@ body: |
 
   bb.3:
     successors: %bb.4, %bb.5
-    BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
     S_CBRANCH_VCCNZ %bb.5, implicit $vcc
 
   bb.4:
     successors: %bb.5
-    renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0
+    renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0, 0
     renamable $vgpr2 = BUFFER_ATOMIC_ADD_OFFSET_RTN killed renamable $vgpr2, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, implicit $exec
 
   bb.5:
     successors: %bb.6
 
   bb.6:
-    FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir Tue Apr 30 15:08:23 2019
@@ -15,11 +15,11 @@ body: |
   bb.0:
     S_BRANCH %bb.1
   bb.1:
-    GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, 0, implicit $exec
-    $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec
-    $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec
-    GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, 0, implicit $exec
-    $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, 0, 0, implicit $exec
+    $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec
+    $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+    GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, 0, 0, implicit $exec
+    $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, 0, implicit $exec
     S_CBRANCH_SCC1 %bb.1, implicit $scc
   bb.2:
     S_ENDPGM 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir Tue Apr 30 15:08:23 2019
@@ -20,7 +20,7 @@ body:             |
   bb.0:
     liveins: $sgpr0, $sgpr1, $vgpr0
 
-    renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0
+    renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0, 0
     renamable $vgpr13 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
     S_WAITCNT -16257
     renamable $vgpr0_vgpr1 = DS_READ2_B32 renamable $vgpr13, 0, 1, 0, implicit $m0, implicit $exec

Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir Tue Apr 30 15:08:23 2019
@@ -44,21 +44,21 @@ name: flat_zero_waitcnt
 body: |
   bb.0:
     successors: %bb.1
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
-    $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
+    $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
     $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
     S_BRANCH %bb.1
 
   bb.1:
     successors: %bb.2
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
     $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
     S_BRANCH %bb.2
 
   bb.2:
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
-    $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
+    $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
     $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
     S_ENDPGM 0
 ...
@@ -79,11 +79,11 @@ name: single_fallthrough_successor_no_en
 body: |
   bb.0:
     successors: %bb.1
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 
   bb.1:
     $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
-    FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...
 ---
@@ -106,15 +106,15 @@ name: single_branch_successor_not_next_b
 body: |
   bb.0:
     successors: %bb.2
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
    S_BRANCH %bb.2
 
   bb.1:
-    FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 
   bb.2:
      $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
-    FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     S_ENDPGM 0
 ...

Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir Tue Apr 30 15:08:23 2019
@@ -16,13 +16,13 @@ body:             |
     %0:sgpr_64 = COPY $sgpr0_sgpr1
     %vreg123_1:vgpr_32 = COPY %11
     %27:vreg_64 = REG_SEQUENCE %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1
-    %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0
+    %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
     %vreg123_2:vgpr_32 = COPY %4
-    %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0
+    %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
     %vreg123_3:vgpr_32 = COPY %5
     %16:sgpr_128 = REG_SEQUENCE killed %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1, %vreg123_2, %subreg.sub2, %vreg123_3, %subreg.sub3
 
-    BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir Tue Apr 30 15:08:23 2019
@@ -11,7 +11,7 @@
 # CHECK: scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
 # CHECK: scratchWaveOffsetReg: '$sgpr50'
 # CHECK: frameOffsetReg:  '$sgpr50'
-# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
 name: reserve_correct_register
 tracksRegLiveness: true
 machineFunctionInfo:
@@ -24,6 +24,6 @@ stack:
 
 body:             |
   bb.0:
-    renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
     S_ENDPGM 0
 ...

Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir Tue Apr 30 15:08:23 2019
@@ -42,9 +42,9 @@
   !0 = !{i32 1}
 
 # GCN-LABEL: name: syncscopes
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
 ...
 ---
 name:            syncscopes
@@ -74,27 +74,27 @@ body:             |
     liveins: $sgpr4_sgpr5
 
     S_WAITCNT 0
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
-    $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
-    $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+    $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 127
     $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec
     $vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
-    FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out)
+    FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out)
     S_WAITCNT 112
     $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
     $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $sgpr2_sgpr3, implicit $exec
     $vgpr2 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
-    FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+    FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
     S_WAITCNT 112
     $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
     $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $sgpr4_sgpr5, implicit $exec
     $vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec
-    FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
+    FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
     S_ENDPGM 0
 
 ...

Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir Tue Apr 30 15:08:23 2019
@@ -40,19 +40,19 @@ body: |
     $sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start), implicit-def $scc, implicit-def $scc
     $sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc
     $sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc
-    $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0
+    $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0
     $sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc
     $sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc
     $sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc
     $sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc
     $sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc
     $sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
 ---
@@ -70,18 +70,18 @@ body: |
     $sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def $scc, implicit-def $scc
     $sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc
     $sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc
-    $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0
+    $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0
     $sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc
     $sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc
     $sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc
     $sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc
     $sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc
     $sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...

Added: llvm/trunk/test/MC/AMDGPU/flat-gfx10.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/flat-gfx10.s?rev=359621&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/flat-gfx10.s (added)
+++ llvm/trunk/test/MC/AMDGPU/flat-gfx10.s Tue Apr 30 15:08:23 2019
@@ -0,0 +1,119 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s
+
+flat_load_dword v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:-1
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4] offset:2047
+// GFX10: encoding: [0xff,0x07,0x30,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:2048
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4] offset:4 glc
+// GFX10: encoding: [0x04,0x00,0x31,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:4 glc slc
+// GFX10: encoding: [0x04,0x00,0x33,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:4 glc slc dlc
+// GFX10: encoding: [0x04,0x10,0x33,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_atomic_add v[3:4], v5 offset:8 slc
+// GFX10: encoding: [0x08,0x00,0xca,0xdc,0x03,0x05,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] offset:2047
+// GFX10: encoding: [0xff,0x07,0xc4,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 slc
+// GFX10: encoding: [0xff,0x07,0xc6,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4]
+// GFX10: encoding: [0x00,0x00,0xc4,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] slc
+// GFX10: encoding: [0x00,0x00,0xc6,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 glc
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_atomic_cmpswap v[1:2], v[3:4] glc
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc
+// GFX10: encoding: [0xff,0x07,0xc5,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc slc
+// GFX10: encoding: [0xff,0x07,0xc7,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] glc
+// GFX10: encoding: [0x00,0x00,0xc5,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] glc slc
+// GFX10: encoding: [0x00,0x00,0xc7,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] glc
+// GFX10: encoding: [0x00,0x00,0xc5,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047
+// GFX10-ERR: error: too few operands for instruction
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] slc
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_atomic_swap v[3:4], v5 offset:16
+// GFX10: encoding: [0x10,0x00,0xc0,0xdc,0x03,0x05,0x7d,0x00]
+
+flat_store_dword v[3:4], v1 offset:16
+// GFX10: encoding: [0x10,0x00,0x70,0xdc,0x03,0x01,0x7d,0x00]
+
+flat_store_dword v[3:4], v1, off
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_store_dword v[3:4], v1, s[0:1]
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_store_dword v[3:4], v1, s0
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], off
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], s[0:1]
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], s0
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], exec_hi
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_store_dword v[3:4], v1, exec_hi
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_ubyte_d16 v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x80,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_ubyte_d16_hi v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x84,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_sbyte_d16 v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x88,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_sbyte_d16_hi v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x8c,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_short_d16 v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x90,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_short_d16_hi v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x94,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_store_byte_d16_hi v[3:4], v1
+// GFX10: encoding: [0x00,0x00,0x64,0xdc,0x03,0x01,0x7d,0x00]
+
+flat_store_short_d16_hi v[3:4], v1
+// GFX10: encoding: [0x00,0x00,0x6c,0xdc,0x03,0x01,0x7d,0x00]

Modified: llvm/trunk/test/MC/AMDGPU/flat-global.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/flat-global.s?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/flat-global.s (original)
+++ llvm/trunk/test/MC/AMDGPU/flat-global.s Tue Apr 30 15:08:23 2019
@@ -2,364 +2,527 @@
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s
 // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s
 
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR %s
+
 global_load_ubyte v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x20,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_ubyte v1, v[3:4], off ; encoding: [0x00,0x80,0x40,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_ubyte v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x20,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_load_sbyte v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x24,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_sbyte v1, v[3:4], off ; encoding: [0x00,0x80,0x44,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_sbyte v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x24,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_load_ushort v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x28,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_ushort v1, v[3:4], off ; encoding: [0x00,0x80,0x48,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_ushort v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x28,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_load_sshort v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x2c,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_sshort v1, v[3:4], off ; encoding: [0x00,0x80,0x4c,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_sshort v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x2c,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_load_dword v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_dword v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x30,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_load_dwordx2 v[1:2], v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x34,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_dwordx2 v[1:2], v[3:4], off ; encoding: [0x00,0x80,0x54,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_dwordx2 v[1:2], v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x34,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_load_dwordx3 v[1:3], v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x3c,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_dwordx3 v[1:3], v[3:4], off ; encoding: [0x00,0x80,0x58,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_dwordx3 v[1:3], v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x3c,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_load_dwordx4 v[1:4], v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x38,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_dwordx4 v[1:4], v[3:4], off   ; encoding: [0x00,0x80,0x5c,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+global_load_dwordx4 v[1:4], v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x38,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 // FIXME: VI error should be instruction nto supported
 global_load_dword v1, v[3:4], off offset:0
+// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_dword v1, v[3:4], off    ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: :41: error: not a valid operand.
 
 global_load_dword v1, v[3:4], off offset:4095
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9: global_load_dword v1, v[3:4], off offset:4095 ; encoding: [0xff,0x8f,0x50,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: :41: error: not a valid operand.
 
 global_load_dword v1, v[3:4], off offset:-1
+// GFX10: encoding: [0xff,0x8f,0x30,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_dword v1, v[3:4], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: :41: error: not a valid operand.
 
 global_load_dword v1, v[3:4], off offset:-4096
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9: global_load_dword v1, v[3:4], off offset:-4096 ; encoding: [0x00,0x90,0x50,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: :41: error: not a valid operand.
 
 global_load_dword v1, v[3:4], off offset:4096
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: :35: error: invalid operand for instruction
 // VI-ERR: :41: error: not a valid operand.
 
 global_load_dword v1, v[3:4] off, offset:-4097
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: :35: error: invalid operand for instruction
 // VI-ERR: :41: error: not a valid operand.
 
 global_store_byte v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_byte v[3:4], v1, off ; encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+global_store_byte v[3:4], v1, off dlc
+// GFX10: encoding: [0x00,0x90,0x60,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_store_short v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_short v[3:4], v1, off ; encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+global_store_short v[3:4], v1, off dlc
+// GFX10: encoding: [0x00,0x90,0x68,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_store_dword v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+global_store_dword v[3:4], v1, off dlc
+// GFX10: encoding: [0x00,0x90,0x70,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_store_dwordx2 v[3:4], v[1:2], off
+// GFX10: encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_dwordx2 v[3:4], v[1:2], off ; encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+global_store_dwordx2 v[3:4], v[1:2], off dlc
+// GFX10: encoding: [0x00,0x90,0x74,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_store_dwordx3 v[3:4], v[1:3], off
+// GFX10: encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_dwordx3 v[3:4], v[1:3], off ; encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+global_store_dwordx3 v[3:4], v[1:3], off dlc
+// GFX10: encoding: [0x00,0x90,0x7c,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_store_dwordx4 v[3:4], v[1:4], off
+// GFX10: encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_dwordx4 v[3:4], v[1:4], off ; encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+global_store_dwordx4 v[3:4], v[1:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x78,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 global_store_dword v[3:4], v1, off offset:12
+// GFX10: encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: :42: error: not a valid operand
 
 global_load_dword v1, v[3:4], s[2:3]
+// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
 // GFX9: global_load_dword v1, v[3:4], s[2:3] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 global_load_dword v1, v[3:4], s[2:3] offset:24
+// GFX10: encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
 // GFX9: global_load_dword v1, v[3:4], s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
 // VI-ERR: :44: error: not a valid operand.
 
 global_load_dword v1, v[3:4], s[2:3] offset:-8
+// GFX10: encoding: [0xf8,0x8f,0x30,0xdc,0x03,0x00,0x02,0x01]
 // GFX9: global_load_dword v1, v[3:4], s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01]
 // VI-ERR: :44: error: not a valid operand.
 
 global_store_dword v[3:4], v1, s[2:3]
+// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
 // GFX9: global_store_dword v[3:4], v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_store_dword v[3:4], v1, s[2:3] offset:24
+// GFX10: encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
 // GFX9: global_store_dword v[3:4], v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
 // VI-ERR: :45: error: not a valid operand.
 
 global_store_dword v[3:4], v1, s[2:3] offset:-8
+// GFX10: encoding: [0xf8,0x8f,0x70,0xdc,0x03,0x01,0x02,0x00]
 // GFX9: global_store_dword v[3:4], v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00]
 // VI-ERR: :45: error: not a valid operand.
 
 // XXX: Is this valid?
 global_store_dword v[3:4], v1, exec
+// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
 // GFX9: global_store_dword v[3:4], v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_load_dword v1, v[3:4], s2
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: :31: error: invalid operand for instruction
 // VI-ERR: :31: error: invalid operand for instruction
 
 global_load_dword v1, v[3:4], exec_hi
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: :31: error: invalid operand for instruction
 // VI-ERR: :31: error: invalid operand for instruction
 
 global_atomic_cmpswap v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0xc4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_cmpswap v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x04,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: error: instruction not supported on this GPU
 
 global_atomic_cmpswap_x2 v[3:4], v[5:8], off
+// GFX10: encoding: [0x00,0x80,0x44,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8], off ; encoding: [0x00,0x80,0x84,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: error: instruction not supported on this GPU
 
 global_atomic_swap v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xc0,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_swap v[3:4], v5, off   ; encoding: [0x00,0x80,0x00,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: error: instruction not supported on this GPU
 
 global_atomic_swap_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x40,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_swap_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x80,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: error: instruction not supported on this GPU
 
 global_atomic_add v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xc8,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_add v[3:4], v5, off   ; encoding: [0x00,0x80,0x08,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_sub v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xcc,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_sub v[3:4], v5, off    ; encoding: [0x00,0x80,0x0c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_smin v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smin v[3:4], v5, off   ; encoding: [0x00,0x80,0x10,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_umin v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xd8,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umin v[3:4], v5, off   ; encoding: [0x00,0x80,0x14,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_smax v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xdc,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smax v[3:4], v5, off   ; encoding: [0x00,0x80,0x18,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_umax v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xe0,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umax v[3:4], v5, off   ; encoding: [0x00,0x80,0x1c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_and v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xe4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_and v[3:4], v5, off    ; encoding: [0x00,0x80,0x20,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_or v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xe8,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_or v[3:4], v5, off     ; encoding: [0x00,0x80,0x24,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_xor v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xec,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_xor v[3:4], v5, off    ; encoding: [0x00,0x80,0x28,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_inc v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xf0,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_inc v[3:4], v5, off    ; encoding: [0x00,0x80,0x2c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_dec v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xf4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_dec v[3:4], v5, off    ; encoding: [0x00,0x80,0x30,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_add_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x48,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_add_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x88,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_sub_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x4c,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x8c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_smin_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x54,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x90,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_umin_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x58,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x94,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_smax_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x5c,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x98,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_umax_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x60,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x9c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_and_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x64,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_and_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa0,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_or_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x68,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_or_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa4,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_xor_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x6c,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa8,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_inc_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x70,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xac,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_atomic_dec_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x74,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_dec_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xb0,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: error: instruction not supported on this GPU
 
 global_atomic_cmpswap v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xc4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_cmpswap v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x04,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :49: error: not a valid operand.
 
 global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x44,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16 ; encoding: [0xf0,0x9f,0x84,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :52: error: not a valid operand.
 
 global_atomic_swap v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xc0,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_swap v[3:4], v5, off   offset:-16 ; encoding: [0xf0,0x9f,0x00,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :42: error: not a valid operand
 
 global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x40,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x80,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :49: error: not a valid operand
 
 global_atomic_add v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xc8,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_add v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x08,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :41: error: not a valid operand
 
 global_atomic_sub v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xcc,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_sub v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x0c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :41: error: not a valid operand
 
 global_atomic_smin v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xd4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x10,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :42: error: not a valid operand
 
 global_atomic_umin v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xd8,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x14,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :42: error: not a valid operand
 
 global_atomic_smax v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xdc,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x18,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :42: error: not a valid operand
 
 global_atomic_umax v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xe0,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x1c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :42: error: not a valid operand
 
 global_atomic_and v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xe4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_and v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x20,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :41: error: not a valid operand
 
 global_atomic_or v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xe8,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_or v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x24,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :40: error: not a valid operand
 
 global_atomic_xor v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xec,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_xor v[3:4], v5, off  offset:-16 ; encoding: [0xf0,0x9f,0x28,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :41: error: not a valid operand
 
 global_atomic_inc v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xf0,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_inc v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x2c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :41: error: not a valid operand
 
 global_atomic_dec v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xf4,0xdc,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_dec v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x30,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :41: error: not a valid operand
 
 global_atomic_add_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x48,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_add_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x88,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :48: error: not a valid operand
 
 global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x4c,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x8c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :48: error: not a valid operand
 
 global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x54,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x90,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :49: error: not a valid operand
 
 global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x58,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x94,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :49: error: not a valid operand
 
 global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x5c,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x98,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :49: error: not a valid operand
 
 global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x60,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x9c,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :49: error: not a valid operand
 
 global_atomic_and_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x64,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_and_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa0,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :48: error: not a valid operand
 
 global_atomic_or_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x68,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_or_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa4,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :47: error: not a valid operand
 
 global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x6c,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa8,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :48: error: not a valid operand
 
 global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x70,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xac,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :48: error: not a valid operand
 
 global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x74,0xdd,0x03,0x05,0x7d,0x00]
 // GFX9: global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xb0,0xdd,0x03,0x05,0x7f,0x00]
 // VI-ERR: :48: error: not a valid operand
 
 global_load_ubyte_d16 v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_ubyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 global_load_ubyte_d16_hi v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_ubyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 global_load_sbyte_d16 v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_sbyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 global_load_sbyte_d16_hi v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_sbyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 global_load_short_d16 v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_short_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 global_load_short_d16_hi v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: global_load_short_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 global_store_byte_d16_hi v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 global_store_short_d16_hi v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7d,0x00]
 // GFX9: global_store_short_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU

Modified: llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s (original)
+++ llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s Tue Apr 30 15:08:23 2019
@@ -2,176 +2,291 @@
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s
 // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s
 
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s
+
 scratch_load_ubyte v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x20,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_ubyte v1, v2, off      ; encoding: [0x00,0x40,0x40,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_load_ubyte v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x20,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_load_sbyte v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x24,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_sbyte v1, v2, off      ; encoding: [0x00,0x40,0x44,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_load_sbyte v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x24,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_load_ushort v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x28,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_ushort v1, v2, off      ; encoding: [0x00,0x40,0x48,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_load_ushort v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x28,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_load_sshort v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x2c,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_sshort v1, v2, off      ; encoding: [0x00,0x40,0x4c,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_load_sshort v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x2c,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_load_dword v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_load_dword v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x30,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_load_dwordx2 v[1:2], v3, off
+// GFX10: encoding: [0x00,0x40,0x34,0xdc,0x03,0x00,0x7d,0x01]
 // GFX9: scratch_load_dwordx2 v[1:2], v3, off      ; encoding: [0x00,0x40,0x54,0xdc,0x03,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_load_dwordx2 v[1:2], v3, off dlc
+// GFX10: encoding: [0x00,0x50,0x34,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_load_dwordx3 v[1:3], v4, off
+// GFX10: encoding: [0x00,0x40,0x3c,0xdc,0x04,0x00,0x7d,0x01]
 // GFX9: scratch_load_dwordx3 v[1:3], v4, off      ; encoding: [0x00,0x40,0x58,0xdc,0x04,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_load_dwordx3 v[1:3], v4, off dlc
+// GFX10: encoding: [0x00,0x50,0x3c,0xdc,0x04,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_load_dwordx4 v[1:4], v5, off
+// GFX10: encoding: [0x00,0x40,0x38,0xdc,0x05,0x00,0x7d,0x01]
 // GFX9: scratch_load_dwordx4 v[1:4], v5, off      ; encoding: [0x00,0x40,0x5c,0xdc,0x05,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
-// FIXME: VI error should be instruction nto supported
+
+scratch_load_dwordx4 v[1:4], v5, off dlc
+// GFX10: encoding: [0x00,0x50,0x38,0xdc,0x05,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
 
 scratch_load_dword v1, v2, off offset:0
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_dword v1, v2, off      ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: error: not a valid operand.
 
 scratch_load_dword v1, v2, off offset:4095
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9: scratch_load_dword v1, v2, off offset:4095 ; encoding: [0xff,0x4f,0x50,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: error: not a valid operand.
 
 scratch_load_dword v1, v2, off offset:-1
+// GFX10: encoding: [0xff,0x4f,0x30,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_dword v1, v2, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: error: not a valid operand.
 
 scratch_load_dword v1, v2, off offset:-4096
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9: scratch_load_dword v1, v2, off offset:-4096 ; encoding: [0x00,0x50,0x50,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: error: not a valid operand.
 
 scratch_load_dword v1, v2, off offset:4096
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: not a valid operand.
 
 scratch_load_dword v1, v2, off offset:-4097
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: not a valid operand.
 
 scratch_store_byte v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7d,0x00]
 // GFX9: scratch_store_byte v1, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_store_byte v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x60,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_store_short v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7d,0x00]
 // GFX9: scratch_store_short v1, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_store_short v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x68,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_store_dword v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00]
 // GFX9: scratch_store_dword v1, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_store_dword v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x70,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_store_dwordx2 v1, v[2:3], off
+// GFX10: encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7d,0x00]
 // GFX9: scratch_store_dwordx2 v1, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_store_dwordx2 v1, v[2:3], off dlc
+// GFX10: encoding: [0x00,0x50,0x74,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_store_dwordx3 v1, v[2:4], off
+// GFX10: encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7d,0x00]
 // GFX9: scratch_store_dwordx3 v1, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_store_dwordx3 v1, v[2:4], off dlc
+// GFX10: encoding: [0x00,0x50,0x7c,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_store_dwordx4 v1, v[2:5], off
+// GFX10: encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7d,0x00]
 // GFX9: scratch_store_dwordx4 v1, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7f,0x00]
 // VI-ERR: instruction not supported on this GPU
 
+scratch_store_dwordx4 v1, v[2:5], off dlc
+// GFX10: encoding: [0x00,0x50,0x78,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
 scratch_store_dword v1, v2, off offset:12
+// GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00]
 // GFX9: scratch_store_dword v1, v2, off offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00]
 // VI-ERR: error: not a valid operand
 
 scratch_load_dword v1, off, s1
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x01,0x01]
 // GFX9: scratch_load_dword v1, off, s1 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x01,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_dword v1, off, s1 offset:32
+// GFX10: encoding: [0x20,0x40,0x30,0xdc,0x00,0x00,0x01,0x01]
 // GFX9: scratch_load_dword v1, off, s1 offset:32 ; encoding: [0x20,0x40,0x50,0xdc,0x00,0x00,0x01,0x01]
 // VI-ERR: error: not a valid operand
 
 scratch_store_dword off, v2, s1
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
 // GFX9: scratch_store_dword off, v2, s1 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_store_dword off, v2, s1 offset:12
+// GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
 // GFX9: scratch_store_dword off, v2, s1 offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
 // VI-ERR: error: not a valid operand
 
 // FIXME: Should error about multiple offsets
 scratch_load_dword v1, v2, s1
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: invalid operand for instruction
 
 scratch_load_dword v1, v2, s1 offset:32
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: not a valid operand
 
 scratch_store_dword v1, v2, s1
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: invalid operand for instruction
 
 scratch_store_dword v1, v2, s1 offset:32
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: not a valid operand
 
 scratch_load_dword v1, off, exec_hi
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: invalid operand for instruction
 
 scratch_store_dword off, v2, exec_hi
+// GFX10-ERR: error: invalid operand for instruction
 // GFX9-ERR: error: invalid operand for instruction
 // VI-ERR: error: invalid operand for instruction
 
 scratch_load_dword v1, off, exec_lo
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7e,0x01]
 // GFX9: scratch_load_dword v1, off, exec_lo ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7e,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_store_dword off, v2, exec_lo
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00]
 // GFX9: scratch_store_dword off, v2, exec_lo ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_dword v1, off, m0
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7c,0x01]
 // GFX9: scratch_load_dword v1, off, m0  ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7c,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_store_dword off, v2, m0
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00]
 // GFX9: scratch_store_dword off, v2, m0 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_ubyte_d16 v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_ubyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_ubyte_d16_hi v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_ubyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_sbyte_d16 v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_sbyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_sbyte_d16_hi v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_sbyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_short_d16 v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_short_d16 v1, v2, off ; encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_load_short_d16_hi v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7d,0x01]
 // GFX9: scratch_load_short_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7f,0x01]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_store_byte_d16_hi off, v2, s1
+// GFX10: encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00]
 // GFX9: scratch_store_byte_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00]
 // VI-ERR: instruction not supported on this GPU
 
 scratch_store_short_d16_hi off, v2, s1
+// GFX10: encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00]
 // GFX9: scratch_store_short_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00]
 // VI-ERR: instruction not supported on this GPU




More information about the llvm-commits mailing list