[llvm] 36fe3f1 - [AMDGPU] flat scratch SVS addressing mode for gfx940

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 14 15:23:49 PDT 2022


Author: Stanislav Mekhanoshin
Date: 2022-03-14T15:23:36-07:00
New Revision: 36fe3f13a997cc45cbffe8a4631db4b1dade6eb8

URL: https://github.com/llvm/llvm-project/commit/36fe3f13a997cc45cbffe8a4631db4b1dade6eb8
DIFF: https://github.com/llvm/llvm-project/commit/36fe3f13a997cc45cbffe8a4631db4b1dade6eb8.diff

LOG: [AMDGPU] flat scratch SVS addressing mode for gfx940

Both VADDR and SADDR are used in SVS mode.

Differential Revision: https://reviews.llvm.org/D121254

Added: 
    llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
    llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    llvm/lib/Target/AMDGPU/FLATInstructions.td
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
    llvm/test/MC/AMDGPU/flat-scratch-instructions.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index f2b39d68b8572..fd409d6270cce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -93,6 +93,10 @@ def gi_flat_scratch_saddr :
     GIComplexOperandMatcher<s32, "selectScratchSAddr">,
     GIComplexPatternEquiv<ScratchSAddr>;
 
+def gi_flat_scratch_svaddr :
+    GIComplexOperandMatcher<s32, "selectScratchSVAddr">,
+    GIComplexPatternEquiv<ScratchSVAddr>;
+
 def gi_ds_1addr_1offset :
     GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
     GIComplexPatternEquiv<DS1Addr1Offset>;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index dc105dad27ce8..f5b51abd58eaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1798,6 +1798,60 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
+                                             SDValue &VAddr, SDValue &SAddr,
+                                             SDValue &Offset) const  {
+  int64_t ImmOffset = 0;
+
+  SDValue LHS, RHS;
+  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
+    int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
+    const SIInstrInfo *TII = Subtarget->getInstrInfo();
+
+    if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+      Addr = LHS;
+      ImmOffset = COffsetVal;
+    } else if (!LHS->isDivergent() && COffsetVal > 0) {
+      SDLoc SL(N);
+      // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
+      //                         (large_offset & MaxOffset);
+      int64_t SplitImmOffset, RemainderOffset;
+      std::tie(SplitImmOffset, RemainderOffset)
+        = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
+
+      if (isUInt<32>(RemainderOffset)) {
+        SDNode *VMov = CurDAG->getMachineNode(
+          AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
+          CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
+        VAddr = SDValue(VMov, 0);
+        SAddr = LHS;
+        Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+        return true;
+      }
+    }
+  }
+
+  if (Addr.getOpcode() != ISD::ADD)
+    return false;
+
+  LHS = Addr.getOperand(0);
+  RHS = Addr.getOperand(1);
+
+  if (!LHS->isDivergent() && RHS->isDivergent()) {
+    SAddr = LHS;
+    VAddr = RHS;
+  } else if (!RHS->isDivergent() && LHS->isDivergent()) {
+    SAddr = RHS;
+    VAddr = LHS;
+  } else {
+    return false;
+  }
+
+  SAddr = SelectSAddrFI(CurDAG, SAddr);
+  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+  return true;
+}
+
 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
                                           SDValue &Offset, bool &Imm) const {
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index d638d9877a9b3..acf82d6d15e4c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -188,6 +188,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
                          SDValue &VOffset, SDValue &Offset) const;
   bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
                           SDValue &Offset) const;
+  bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
+                           SDValue &SAddr, SDValue &Offset) const;
 
   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
                         bool &Imm) const;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 533b32e94dcf8..bf0f9fa976d46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3792,6 +3792,56 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
   }};
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
+  Register Addr = Root.getReg();
+  Register PtrBase;
+  int64_t ConstOffset;
+  int64_t ImmOffset = 0;
+
+  // Match the immediate offset first, which canonically is moved as low as
+  // possible.
+  std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
+
+  if (ConstOffset != 0 &&
+      TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+    Addr = PtrBase;
+    ImmOffset = ConstOffset;
+  }
+
+  auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+  if (!AddrDef)
+    return None;
+
+  if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
+    return None;
+
+  Register RHS = AddrDef->MI->getOperand(2).getReg();
+  if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
+    return None;
+
+  Register LHS = AddrDef->MI->getOperand(1).getReg();
+  auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
+
+  if (LHSDef && LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
+    int FI = LHSDef->MI->getOperand(1).getIndex();
+    return {{
+        [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+        [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
+        [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+    }};
+  }
+
+  if (!isSGPR(LHS))
+    return None;
+
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+  }};
+}
+
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
   MachineInstr *MI = Root.getParent();

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 42095332d11ac..a879b9a733097 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -203,6 +203,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
 
   InstructionSelector::ComplexRendererFns
   selectScratchSAddr(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectScratchSVAddr(MachineOperand &Root) const;
 
   InstructionSelector::ComplexRendererFns
   selectMUBUFScratchOffen(MachineOperand &Root) const;

diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 58a7980b50458..9f086a29d16f5 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -12,6 +12,7 @@ def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWant
 
 def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
 def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
+def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
 
 //===----------------------------------------------------------------------===//
 // FLAT classes
@@ -56,6 +57,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
   bits<1> dlcValue = 0;
   bits<1> has_sccb  = 1;
   bits<1> sccbValue = 0;
+  bits<1> has_sve  = 0; // Scratch VGPR Enable
+  bits<1> sve = 0;
 
   let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
     !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -123,7 +126,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
 
   // Only valid on GFX9+
   let Inst{12-0} = offset;
-  let Inst{13} = lds;
+  let Inst{13} = !if(ps.has_sve, ps.sve, lds);
   let Inst{15-14} = seg;
 
   let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
@@ -273,16 +276,19 @@ class FlatScratchInst <string sv_op, string mode> {
 class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
   bit HasTiedOutput = 0,
   bit EnableSaddr = 0,
-  bit EnableVaddr = !not(EnableSaddr)>
+  bit EnableSVE = 0,
+  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))>
   : FLAT_Pseudo<
   opName,
   (outs getLdStRegisterOperand<regClass>.ret:$vdst),
   !con(
-     !if(EnableSaddr,
-       (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
-       !if(EnableVaddr,
-         (ins VGPR_32:$vaddr, flat_offset:$offset),
-         (ins flat_offset:$offset))),
+    !if(EnableSVE,
+        (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
+        !if(EnableSaddr,
+          (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
+          !if(EnableVaddr,
+            (ins VGPR_32:$vaddr, flat_offset:$offset),
+            (ins flat_offset:$offset)))),
      !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
                         (ins CPol_0:$cpol))),
   " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
@@ -291,7 +297,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
   let has_saddr = 1;
   let enabled_saddr = EnableSaddr;
   let has_vaddr = EnableVaddr;
-  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
+  let has_sve = EnableSVE;
+  let sve = EnableVaddr;
+  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
   let maybeAtomic = 1;
 
   let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
@@ -299,15 +307,18 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
 }
 
 class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
-  bit EnableVaddr = !not(EnableSaddr),
+  bit EnableSVE = 0,
+  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)),
   RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo<
   opName,
   (outs),
-  !if(EnableSaddr,
-    (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
-    !if(EnableVaddr,
-      (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
-      (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))),
+  !if(EnableSVE,
+    (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
+    !if(EnableSaddr,
+      (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
+      !if(EnableVaddr,
+        (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
+        (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))),
   " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
   let mayLoad  = 0;
   let mayStore = 1;
@@ -315,7 +326,9 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
   let has_saddr = 1;
   let enabled_saddr = EnableSaddr;
   let has_vaddr = EnableVaddr;
-  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
+  let has_sve = EnableSVE;
+  let sve = EnableVaddr;
+  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
   let maybeAtomic = 1;
 }
 
@@ -326,8 +339,12 @@ multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit H
     def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
                  FlatScratchInst<opName, "SS">;
 
+    let SubtargetPredicate = isGFX940Plus in
+    def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>,
+               FlatScratchInst<opName, "SVS">;
+
     let SubtargetPredicate = HasFlatScratchSTMode in
-    def _ST  : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>,
+    def _ST  : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>,
                FlatScratchInst<opName, "ST">;
   }
 }
@@ -339,8 +356,12 @@ multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
     def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
                  FlatScratchInst<opName, "SS">;
 
+    let SubtargetPredicate = isGFX940Plus in
+    def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>,
+               FlatScratchInst<opName, "SVS">;
+
     let SubtargetPredicate = HasFlatScratchSTMode in
-    def _ST  : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>,
+    def _ST  : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>,
                FlatScratchInst<opName, "ST">;
   }
 }
@@ -962,6 +983,22 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
   (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
 >;
 
+class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset))),
+  (inst $vaddr, $saddr, $offset, 0)
+>;
+
+class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+                             ValueType vt> : GCNPat <
+  (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset)),
+  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
+>;
+
+class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
+  (inst $vaddr, $saddr, $offset, 0, $in)
+>;
+
 let OtherPredicates = [HasFlatAddressSpace] in {
 
 def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
@@ -1145,6 +1182,11 @@ multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueTy
   def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
     let AddedComplexity = 26;
   }
+
+  def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+    let SubtargetPredicate = isGFX940Plus;
+    let AddedComplexity = 27;
+  }
 }
 
 multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
@@ -1156,6 +1198,11 @@ multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
   def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
     let AddedComplexity = 26;
   }
+
+  def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+    let SubtargetPredicate = isGFX940Plus;
+    let AddedComplexity = 27;
+  }
 }
 
 multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
@@ -1166,6 +1213,11 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
   def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
     let AddedComplexity = 26;
   }
+
+  def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+    let SubtargetPredicate = isGFX940Plus;
+    let AddedComplexity = 27;
+  }
 }
 
 let OtherPredicates = [HasFlatGlobalInsts] in {
@@ -1451,9 +1503,25 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
   SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
   let AssemblerPredicate = isGFX940Plus;
   let DecoderNamespace = "GFX9";
+  let Inst{13} = ps.sve;
   let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
 }
 
+multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
+  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
+    let AssemblerPredicate = isGFX8GFX9NotGFX940;
+    let OtherPredicates = [isGFX8GFX9NotGFX940];
+  }
+  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
+    let DecoderNamespace = "GFX9";
+  }
+  let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
+    def _VE_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+  }
+}
+
 def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
 def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
 def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
@@ -1573,28 +1641,28 @@ defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
 defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
 defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
 
-defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_vi <0x10>;
-defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_vi <0x11>;
-defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_vi <0x12>;
-defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_vi <0x13>;
-defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_vi <0x14>;
-defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_vi <0x15>;
-defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_vi <0x16>;
-defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_vi <0x17>;
-defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_vi <0x18>;
-defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x19>;
-defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_vi <0x20>;
-defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x21>;
-defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_vi <0x22>;
-defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x23>;
-defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_vi <0x24>;
-defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_vi <0x25>;
-defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_vi <0x1a>;
-defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
-defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
-defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
-defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
-defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
+defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_SVE_vi <0x10>;
+defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_SVE_vi <0x11>;
+defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_SVE_vi <0x12>;
+defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_SVE_vi <0x13>;
+defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_SVE_vi <0x14>;
+defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_SVE_vi <0x15>;
+defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_SVE_vi <0x16>;
+defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_SVE_vi <0x17>;
+defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_SVE_vi <0x18>;
+defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x19>;
+defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x20>;
+defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x21>;
+defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x22>;
+defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x23>;
+defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_SVE_vi <0x24>;
+defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x25>;
+defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_SVE_vi <0x1a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>;
+defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_SVE_vi <0x1c>;
+defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_SVE_vi <0x1d>;
+defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_SVE_vi <0x1e>;
+defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_SVE_vi <0x1f>;
 
 let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
   // These instructions are encoded 
diff erently on gfx90* and gfx940.

diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9b95ade37dfb3..a6470f85a313f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -677,7 +677,9 @@ void SIFoldOperands::foldOperand(
 
     if (TII->isFLATScratch(*UseMI) &&
         AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
-                                   AMDGPU::OpName::vaddr) != -1) {
+                                   AMDGPU::OpName::vaddr) != -1 &&
+        AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
+                                   AMDGPU::OpName::saddr) == -1) {
       unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
       UseMI->setDesc(TII->get(NewOpc));
     }

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 239980e1e9f78..25d3f4a765e6b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1244,6 +1244,11 @@ namespace AMDGPU {
   LLVM_READONLY
   int getFlatScratchInstSTfromSS(uint16_t Opcode);
 
+  /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
+  /// of an SVS (SADDR + VADDR) form.
+  LLVM_READONLY
+  int getFlatScratchInstSVfromSVS(uint16_t Opcode);
+
   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
   /// of an SV (VADDR) form.
   LLVM_READONLY

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index cd2176802ca4e..33b7bc7008f50 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2643,6 +2643,14 @@ def getFlatScratchInstSSfromSV : InstrMapping {
   let ValueCols = [["SS"]];
 }
 
+def getFlatScratchInstSVfromSVS : InstrMapping {
+  let FilterClass = "FlatScratchInst";
+  let RowFields = ["SVOp"];
+  let ColFields = ["Mode"];
+  let KeyCol = ["SVS"];
+  let ValueCols = [["SV"]];
+}
+
 def getFlatScratchInstSVfromSS : InstrMapping {
   let FilterClass = "FlatScratchInst";
   let RowFields = ["SVOp"];

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 120b601f9a042..776cb70f9f7d5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2143,18 +2143,23 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
             Offset = 0;
           }
 
-          assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) &&
-                 "Unexpected vaddr for flat scratch with a FI operand");
-
-          // On GFX10 we have ST mode to use no registers for an address.
-          // Otherwise we need to materialize 0 into an SGPR.
-          if (!Offset && ST.hasFlatScratchSTMode()) {
+          if (!Offset) {
             unsigned Opc = MI->getOpcode();
-            unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
-            MI->RemoveOperand(
-                AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
-            MI->setDesc(TII->get(NewOpc));
-            return;
+            int NewOpc = -1;
+            if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1) {
+              NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+            } else if (ST.hasFlatScratchSTMode()) {
+              // On GFX10 we have ST mode to use no registers for an address.
+              // Otherwise we need to materialize 0 into an SGPR.
+              NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+            }
+
+            if (NewOpc != -1) {
+              MI->RemoveOperand(
+                  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+              MI->setDesc(TII->get(NewOpc));
+              return;
+            }
           }
         }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
index 35c4d56bd901d..5914521cf4189 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
 
 ; Make sure flat_scratch_init is set
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a38017a709548..1c3c79f8b867d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
 
 define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX9-LABEL: store_load_sindex_kernel:
@@ -40,6 +41,22 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_sindex_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    v_mov_b32_e32 v1, s1
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword v1, v0, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, s0
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
   %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -89,6 +106,18 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_vindex_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:128 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
   %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -137,6 +166,19 @@ define void @store_load_vindex_foo(i32 %idx) {
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_vindex_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, s32 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
   %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -167,6 +209,14 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
 ; GFX10-NEXT:    scratch_store_dword v0, v1, off offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: private_ptr_foo:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v1, 0x41200000
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
   %gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1
   store float 1.000000e+01, float addrspace(5)* %gep, align 4
   ret void
@@ -214,6 +264,24 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_sindex_small_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    v_mov_b32_e32 v1, s1
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword v1, v0, off offset:260 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, s0
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:260 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -271,6 +339,20 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_vindex_small_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, off offset:260 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:384 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -328,6 +410,21 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_vindex_small_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -386,6 +483,26 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_sindex_large_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    v_mov_b32_e32 v1, s1
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword v1, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, s0
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %padding = alloca [4096 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -443,6 +560,22 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_vindex_large_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi offset:124 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %padding = alloca [4096 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -500,6 +633,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_vindex_large_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %padding = alloca [4096 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -551,6 +701,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX10-NEXT:    scratch_load_dword v0, off, s0 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_large_imm_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    v_mov_b32_e32 v0, 13
+; GFX940-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0x3e80
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %i = alloca [4096 x i32], align 4, addrspace(5)
   %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
@@ -595,6 +758,20 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-NEXT:    scratch_load_dword v0, off, s0 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_large_imm_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 13
+; GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0x3e80
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, s32 offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = alloca [4096 x i32], align 4, addrspace(5)
   %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
@@ -638,6 +815,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; GFX10-NEXT:    scratch_load_dword v0, v0, off offset:1024 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: store_load_vidx_sidx_offset:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:1028 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:1028 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
 bb:
   %alloca = alloca [32 x i32], align 4, addrspace(5)
   %vidx = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -672,6 +861,16 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX10-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_i64_aligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
+; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile i64 15, i64 addrspace(5)* %arg, align 8
   %load = load volatile i64, i64 addrspace(5)* %arg, align 8
@@ -701,6 +900,16 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX10-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_i64_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
+; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile i64 15, i64 addrspace(5)* %arg, align 1
   %load = load volatile i64, i64 addrspace(5)* %arg, align 1
@@ -738,6 +947,21 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg)
 ; GFX10-NEXT:    scratch_load_dwordx3 v[0:2], v0, off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_v3i32_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    s_mov_b32 s2, 3
+; GFX940-NEXT:    s_mov_b32 s1, 2
+; GFX940-NEXT:    s_mov_b32 s0, 1
+; GFX940-NEXT:    v_mov_b32_e32 v4, s2
+; GFX940-NEXT:    v_mov_b32_e32 v3, s1
+; GFX940-NEXT:    v_mov_b32_e32 v2, s0
+; GFX940-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile <3 x i32> <i32 1, i32 2, i32 3>, <3 x i32> addrspace(5)* %arg, align 1
   %load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1
@@ -779,6 +1003,21 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg)
 ; GFX10-NEXT:    scratch_load_dwordx4 v[0:3], v0, off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_v4i32_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    s_mov_b32 s3, 4
+; GFX940-NEXT:    s_mov_b32 s2, 3
+; GFX940-NEXT:    s_mov_b32 s1, 2
+; GFX940-NEXT:    s_mov_b32 s0, 1
+; GFX940-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX940-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %arg, align 1
   %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 5cf182ba82227..cba84f5dfe4a6 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9-PAL %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1010-PAL %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1030-PAL %s
 
@@ -75,6 +76,20 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: zero_init_kernel:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_mov_b32 s0, 0
+; GFX940-NEXT:    s_mov_b32 s1, s0
+; GFX940-NEXT:    s_mov_b32 s2, s0
+; GFX940-NEXT:    s_mov_b32 s3, s0
+; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:64
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:48
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:32
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:16
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: zero_init_kernel:
 ; GFX1010-PAL:       ; %bb.0:
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -193,6 +208,22 @@ define void @zero_init_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: zero_init_foo:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    s_mov_b32 s0, 0
+; GFX940-NEXT:    s_mov_b32 s1, s0
+; GFX940-NEXT:    s_mov_b32 s2, s0
+; GFX940-NEXT:    s_mov_b32 s3, s0
+; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:48
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:32
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:16
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: zero_init_foo:
 ; GFX10-PAL:       ; %bb.0:
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -211,6 +242,21 @@ define void @zero_init_foo() {
 ; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s32
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: zero_init_foo:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s0, 0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:48
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:32
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:16
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca [32 x i16], align 2, addrspace(5)
   %cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)*
   call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false)
@@ -278,6 +324,22 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_sindex_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    s_add_i32 s1, s1, 4
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX10-PAL-LABEL: store_load_sindex_kernel:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_getpc_b64 s[4:5]
@@ -302,6 +364,21 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, off, s1 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_endpgm
+; GCN-LABEL: store_load_sindex_kernel:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 15
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_lshl_b32 s1, s0, 2
+; GCN-NEXT:    s_and_b32 s0, s0, 15
+; GCN-NEXT:    s_lshl_b32 s0, s0, 2
+; GCN-NEXT:    s_add_u32 s1, 4, s1
+; GCN-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_add_u32 s0, 4, s0
+; GCN-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
   %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -371,6 +448,20 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_sindex_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    s_add_i32 s1, s1, 4
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX10-PAL-LABEL: store_load_sindex_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -393,6 +484,19 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, off, s1 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_endpgm
+; GCN-LABEL: store_load_sindex_foo:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_lshl_b32 s1, s0, 2
+; GCN-NEXT:    s_and_b32 s0, s0, 15
+; GCN-NEXT:    s_lshl_b32 s0, s0, 2
+; GCN-NEXT:    s_add_u32 s1, 4, s1
+; GCN-NEXT:    v_mov_b32_e32 v0, 15
+; GCN-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_add_u32 s0, 4, s0
+; GCN-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
   %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -459,6 +563,17 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_vindex_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_sub_u32_e32 v0, 4, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX10-PAL-LABEL: store_load_vindex_kernel:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -480,6 +595,16 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v0, off offset:124 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_endpgm
+; GCN-LABEL: store_load_vindex_kernel:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    v_mov_b32_e32 v1, 15
+; GCN-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_sub_u32_e32 v0, 4, v0
+; GCN-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
   %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -539,6 +664,19 @@ define void @store_load_vindex_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_vindex_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, s32 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_vindex_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -553,6 +691,18 @@ define void @store_load_vindex_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v1, off glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_vindex_foo:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, 15
+; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
+; GCN-NEXT:    scratch_store_dword v1, v2, s32 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
   %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -592,6 +742,14 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: private_ptr_foo:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v1, 0x41200000
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: private_ptr_foo:
 ; GFX10-PAL:       ; %bb.0:
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -600,6 +758,13 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
 ; GFX10-PAL-NEXT:    scratch_store_dword v0, v1, off offset:4
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: private_ptr_foo:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v1, 0x41200000
+; GCN-NEXT:    scratch_store_dword v0, v1, off offset:4
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1
   store float 1.000000e+01, float addrspace(5)* %gep, align 4
   ret void
@@ -683,6 +848,22 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:320
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: zero_init_small_offset_kernel:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_mov_b32 s0, 0
+; GFX940-NEXT:    s_mov_b32 s1, s0
+; GFX940-NEXT:    s_mov_b32 s2, s0
+; GFX940-NEXT:    s_mov_b32 s3, s0
+; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:272
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:288
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:304
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:320
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: zero_init_small_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0:
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -815,6 +996,24 @@ define void @zero_init_small_offset_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: zero_init_small_offset_foo:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, off, s32 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_mov_b32 s0, 0
+; GFX940-NEXT:    s_mov_b32 s1, s0
+; GFX940-NEXT:    s_mov_b32 s2, s0
+; GFX940-NEXT:    s_mov_b32 s3, s0
+; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:256
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:272
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:288
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:304
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: zero_init_small_offset_foo:
 ; GFX10-PAL:       ; %bb.0:
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -835,6 +1034,23 @@ define void @zero_init_small_offset_foo() {
 ; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:304
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: zero_init_small_offset_foo:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    scratch_load_dword v0, off, s32 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b32 s0, 0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:256
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:272
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:288
+; GCN-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:304
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %alloca = alloca [32 x i16], align 2, addrspace(5)
   %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef
@@ -912,6 +1128,24 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_sindex_small_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    s_addk_i32 s1, 0x104
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_addk_i32 s0, 0x104
+; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[4:5]
@@ -1046,6 +1280,22 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_sindex_small_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    s_addk_i32 s1, 0x104
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_addk_i32 s0, 0x104
+; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_foo:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -1173,6 +1423,19 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_vindex_small_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:260 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_sub_u32_e32 v0, 0x104, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: store_load_vindex_small_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -1292,6 +1555,21 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_vindex_small_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_vindex_small_offset_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1309,6 +1587,20 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v1, off glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_vindex_small_offset_foo:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, 15
+; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
+; GCN-NEXT:    scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -1407,6 +1699,26 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: zero_init_large_offset_kernel:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:16 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_mov_b32 s0, 0
+; GFX940-NEXT:    s_mov_b32 s1, s0
+; GFX940-NEXT:    s_mov_b32 s2, s0
+; GFX940-NEXT:    s_mov_b32 s3, s0
+; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: zero_init_large_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0:
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -1555,6 +1867,28 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: zero_init_large_offset_foo:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, off, s32 offset:16 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_mov_b32 s0, 0
+; GFX940-NEXT:    s_mov_b32 s1, s0
+; GFX940-NEXT:    s_mov_b32 s2, s0
+; GFX940-NEXT:    s_mov_b32 s3, s0
+; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX1010-PAL-LABEL: zero_init_large_offset_foo:
 ; GFX1010-PAL:       ; %bb.0:
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1684,6 +2018,24 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_sindex_large_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    s_addk_i32 s1, 0x4004
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_addk_i32 s0, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[4:5]
@@ -1818,6 +2170,22 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_sindex_large_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
+; GFX940-NEXT:    s_addk_i32 s1, 0x4004
+; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_addk_i32 s0, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_foo:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -1945,6 +2313,20 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_vindex_large_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    scratch_store_dword v0, v1, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_sub_u32_e32 v0, 0x4004, v0
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: store_load_vindex_large_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -2064,6 +2446,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_vindex_large_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_vindex_large_offset_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2081,6 +2480,22 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v1, off glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_vindex_large_offset_foo:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, 15
+; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
+; GCN-NEXT:    s_add_u32 vcc_hi, s32, 0x4000
+; GCN-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    s_add_u32 vcc_hi, s32, 0x4000
+; GCN-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %padding = alloca [4096 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
@@ -2155,6 +2570,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_large_imm_offset_kernel:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    v_mov_b32_e32 v0, 13
+; GFX940-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0x3000
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:3716 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:3716 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX1010-PAL-LABEL: store_load_large_imm_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_getpc_b64 s[2:3]
@@ -2262,6 +2690,20 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_large_imm_offset_foo:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 13
+; GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0x3000
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, s32 offset:3716 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:3716 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_large_imm_offset_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2278,6 +2720,19 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, off, s0 offset:1664 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_large_imm_offset_foo:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, 13
+; GCN-NEXT:    scratch_store_dword off, v0, s32 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, 0x3000
+; GCN-NEXT:    v_mov_b32_e32 v1, 15
+; GCN-NEXT:    scratch_store_dword v0, v1, s32 offset:3712 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    scratch_load_dword v0, v0, s32 offset:3712 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = alloca [4096 x i32], align 4, addrspace(5)
   %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
@@ -2343,6 +2798,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: store_load_vidx_sidx_offset:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
+; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:1028 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:1028 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX10-PAL-LABEL: store_load_vidx_sidx_offset:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_getpc_b64 s[4:5]
@@ -2364,6 +2831,17 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v0, off offset:1024 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_endpgm
+; GCN-LABEL: store_load_vidx_sidx_offset:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v1, 15
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
+; GCN-NEXT:    scratch_store_dword v0, v1, off offset:1028 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    scratch_load_dword v0, v0, off offset:1028 sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
 bb:
   %alloca = alloca [32 x i32], align 4, addrspace(5)
   %vidx = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -2410,6 +2888,17 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_i64_aligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_mov_b32_e32 v3, 0
+; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_i64_aligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2421,6 +2910,16 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX10-PAL-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_i64_aligned:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, 15
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile i64 15, i64 addrspace(5)* %arg, align 8
   %load = load volatile i64, i64 addrspace(5)* %arg, align 8
@@ -2462,6 +2961,17 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_i64_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v2, 15
+; GFX940-NEXT:    v_mov_b32_e32 v3, 0
+; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_i64_unaligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2473,6 +2983,16 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX10-PAL-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_i64_unaligned:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, 15
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile i64 15, i64 addrspace(5)* %arg, align 1
   %load = load volatile i64, i64 addrspace(5)* %arg, align 1
@@ -2517,6 +3037,18 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg)
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_v3i32_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-NEXT:    v_mov_b32_e32 v3, 2
+; GFX940-NEXT:    v_mov_b32_e32 v4, 3
+; GFX940-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_v3i32_unaligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2529,6 +3061,17 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg)
 ; GFX10-PAL-NEXT:    scratch_load_dwordx3 v[0:2], v0, off glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_v3i32_unaligned:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, 1
+; GCN-NEXT:    v_mov_b32_e32 v3, 2
+; GCN-NEXT:    v_mov_b32_e32 v4, 3
+; GCN-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile <3 x i32> <i32 1, i32 2, i32 3>, <3 x i32> addrspace(5)* %arg, align 1
   %load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1
@@ -2576,6 +3119,19 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg)
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_v4i32_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-NEXT:    v_mov_b32_e32 v3, 2
+; GFX940-NEXT:    v_mov_b32_e32 v4, 3
+; GFX940-NEXT:    v_mov_b32_e32 v5, 4
+; GFX940-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-PAL-LABEL: store_load_v4i32_unaligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2589,6 +3145,18 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg)
 ; GFX10-PAL-NEXT:    scratch_load_dwordx4 v[0:3], v0, off glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_v4i32_unaligned:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, 1
+; GCN-NEXT:    v_mov_b32_e32 v3, 2
+; GCN-NEXT:    v_mov_b32_e32 v4, 3
+; GCN-NEXT:    v_mov_b32_e32 v5, 4
+; GCN-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
   store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %arg, align 1
   %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1
@@ -2629,6 +3197,17 @@ define void @store_load_i32_negative_unaligned(i8 addrspace(5)* nocapture %arg)
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_i32_negative_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_add_u32_e32 v0, -1, v0
+; GFX940-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_ubyte v0, v0, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX1010-PAL-LABEL: store_load_i32_negative_unaligned:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2693,6 +3272,17 @@ define void @store_load_i32_large_negative_unaligned(i8 addrspace(5)* nocapture
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX940-LABEL: store_load_i32_large_negative_unaligned:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    s_movk_i32 s0, 0xef7f
+; GFX940-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-NEXT:    scratch_store_byte v0, v1, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_ubyte v0, v0, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX1010-PAL-LABEL: store_load_i32_large_negative_unaligned:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2803,6 +3393,26 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-PAL-NEXT:    ;;#ASMEND
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
+; GFX940-LABEL: large_offset:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0
+; GFX940-NEXT:    v_mov_b32_e32 v1, v0
+; GFX940-NEXT:    v_mov_b32_e32 v2, v0
+; GFX940-NEXT:    v_mov_b32_e32 v3, v0
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    v_mov_b32_e32 v0, 16
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use v0
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0x810
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use v0
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    s_endpgm
+;
 ; GFX10-PAL-LABEL: large_offset:
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_getpc_b64 s[2:3]

diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
index b511b98ac2551..00c74ff0839da 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
@@ -282,10 +282,10 @@ define amdgpu_kernel void @private_nontemporal_load_1(
 ; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
 ; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s4, s[0:1], 0x0
 ; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX940-NOTTGSPLIT-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NOTTGSPLIT-NEXT:    v_lshl_add_u32 v0, v0, 2, s4
-; GFX940-NOTTGSPLIT-NEXT:    scratch_load_dword v0, v0, off nt
+; GFX940-NOTTGSPLIT-NEXT:    scratch_load_dword v0, v0, s4 nt
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
@@ -294,10 +294,10 @@ define amdgpu_kernel void @private_nontemporal_load_1(
 ; GFX940-TGSPLIT:       ; %bb.0: ; %entry
 ; GFX940-TGSPLIT-NEXT:    s_load_dword s4, s[0:1], 0x0
 ; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX940-TGSPLIT-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-TGSPLIT-NEXT:    v_lshl_add_u32 v0, v0, 2, s4
-; GFX940-TGSPLIT-NEXT:    scratch_load_dword v0, v0, off nt
+; GFX940-TGSPLIT-NEXT:    scratch_load_dword v0, v0, s4 nt
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-TGSPLIT-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX940-TGSPLIT-NEXT:    s_endpgm
@@ -579,24 +579,24 @@ define amdgpu_kernel void @private_nontemporal_store_1(
 ; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
 ; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s4, s[0:1], 0x8
+; GFX940-NOTTGSPLIT-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NOTTGSPLIT-NEXT:    v_lshl_add_u32 v0, v0, 2, s4
 ; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-NOTTGSPLIT-NEXT:    scratch_store_dword v0, v1, off nt
+; GFX940-NOTTGSPLIT-NEXT:    scratch_store_dword v0, v1, s4 nt
 ; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
 ;
 ; GFX940-TGSPLIT-LABEL: private_nontemporal_store_1:
 ; GFX940-TGSPLIT:       ; %bb.0: ; %entry
 ; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX940-TGSPLIT-NEXT:    s_load_dword s4, s[0:1], 0x8
+; GFX940-TGSPLIT-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-TGSPLIT-NEXT:    v_lshl_add_u32 v0, v0, 2, s4
 ; GFX940-TGSPLIT-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-TGSPLIT-NEXT:    scratch_store_dword v0, v1, off nt
+; GFX940-TGSPLIT-NEXT:    scratch_store_dword v0, v1, s4 nt
 ; GFX940-TGSPLIT-NEXT:    s_endpgm
     i32 addrspace(1)* %in, i32 addrspace(5)* %out) {
 entry:

diff  --git a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
new file mode 100644
index 0000000000000..3af48bcd8ea18
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
@@ -0,0 +1,1057 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
+
+scratch_load_dword a2, v4, s6
+// GFX940: scratch_load_dword a2, v4, s6           ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dword a2, v4, s6 offset:16
+// GFX940: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dword a2, v4, off
+// GFX940: scratch_load_dword a2, v4, off          ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dword a2, v4, off offset:16
+// GFX940: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dword a2, off, s6
+// GFX940: scratch_load_dword a2, off, s6          ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dword a2, off, s6 offset:16
+// GFX940: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dword a2, off, off
+// GFX940: scratch_load_dword a2, off, off         ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dword a2, off, off offset:16
+// GFX940: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dword v2, v4, s6
+// GFX940: scratch_load_dword v2, v4, s6           ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dword v2, v4, s6 offset:16
+// GFX940: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dword v2, v4, off
+// GFX940: scratch_load_dword v2, v4, off          ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dword v2, v4, off offset:16
+// GFX940: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dword v2, off, s6
+// GFX940: scratch_load_dword v2, off, s6          ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dword v2, off, s6 offset:16
+// GFX940: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dword v2, off, off
+// GFX940: scratch_load_dword v2, off, off         ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dword v2, off, off offset:16
+// GFX940: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, s6
+// GFX940: scratch_load_dwordx2 a[2:3], v4, s6     ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, s6 offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, off
+// GFX940: scratch_load_dwordx2 a[2:3], v4, off    ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, off offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx2 a[2:3], off, s6
+// GFX940: scratch_load_dwordx2 a[2:3], off, s6    ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], off, s6 offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], off, off
+// GFX940: scratch_load_dwordx2 a[2:3], off, off   ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx2 a[2:3], off, off offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, s6
+// GFX940: scratch_load_dwordx2 v[2:3], v4, s6     ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, s6 offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, off
+// GFX940: scratch_load_dwordx2 v[2:3], v4, off    ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, off offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 v[2:3], off, s6
+// GFX940: scratch_load_dwordx2 v[2:3], off, s6    ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], off, s6 offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], off, off
+// GFX940: scratch_load_dwordx2 v[2:3], off, off   ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 v[2:3], off, off offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, s6
+// GFX940: scratch_load_dwordx3 a[2:4], v4, s6     ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, s6 offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, off
+// GFX940: scratch_load_dwordx3 a[2:4], v4, off    ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, off offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx3 a[2:4], off, s6
+// GFX940: scratch_load_dwordx3 a[2:4], off, s6    ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], off, s6 offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], off, off
+// GFX940: scratch_load_dwordx3 a[2:4], off, off   ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx3 a[2:4], off, off offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, s6
+// GFX940: scratch_load_dwordx3 v[2:4], v4, s6     ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, s6 offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, off
+// GFX940: scratch_load_dwordx3 v[2:4], v4, off    ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, off offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 v[2:4], off, s6
+// GFX940: scratch_load_dwordx3 v[2:4], off, s6    ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], off, s6 offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], off, off
+// GFX940: scratch_load_dwordx3 v[2:4], off, off   ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 v[2:4], off, off offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, s6
+// GFX940: scratch_load_dwordx4 a[2:5], v4, s6     ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, s6 offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, off
+// GFX940: scratch_load_dwordx4 a[2:5], v4, off    ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, off offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx4 a[2:5], off, s6
+// GFX940: scratch_load_dwordx4 a[2:5], off, s6    ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], off, s6 offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], off, off
+// GFX940: scratch_load_dwordx4 a[2:5], off, off   ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx4 a[2:5], off, off offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, s6
+// GFX940: scratch_load_dwordx4 v[2:5], v4, s6     ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, s6 offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, off
+// GFX940: scratch_load_dwordx4 v[2:5], v4, off    ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, off offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 v[2:5], off, s6
+// GFX940: scratch_load_dwordx4 v[2:5], off, s6    ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], off, s6 offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], off, off
+// GFX940: scratch_load_dwordx4 v[2:5], off, off   ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 v[2:5], off, off offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte a2, v4, s6
+// GFX940: scratch_load_sbyte a2, v4, s6           ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, v4, off
+// GFX940: scratch_load_sbyte a2, v4, off          ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte a2, v4, off offset:16
+// GFX940: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte a2, off, s6
+// GFX940: scratch_load_sbyte a2, off, s6          ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, off, s6 offset:16
+// GFX940: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, off, off
+// GFX940: scratch_load_sbyte a2, off, off         ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte a2, off, off offset:16
+// GFX940: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte v2, v4, s6
+// GFX940: scratch_load_sbyte v2, v4, s6           ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, v4, off
+// GFX940: scratch_load_sbyte v2, v4, off          ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte v2, v4, off offset:16
+// GFX940: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte v2, off, s6
+// GFX940: scratch_load_sbyte v2, off, s6          ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, off, s6 offset:16
+// GFX940: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, off, off
+// GFX940: scratch_load_sbyte v2, off, off         ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte v2, off, off offset:16
+// GFX940: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 a2, v4, s6
+// GFX940: scratch_load_sbyte_d16 a2, v4, s6       ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, v4, off
+// GFX940: scratch_load_sbyte_d16 a2, v4, off      ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 a2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 a2, off, s6
+// GFX940: scratch_load_sbyte_d16 a2, off, s6      ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, off, off
+// GFX940: scratch_load_sbyte_d16 a2, off, off     ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 a2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 v2, v4, s6
+// GFX940: scratch_load_sbyte_d16 v2, v4, s6       ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, v4, off
+// GFX940: scratch_load_sbyte_d16 v2, v4, off      ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 v2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 v2, off, s6
+// GFX940: scratch_load_sbyte_d16 v2, off, s6      ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, off, off
+// GFX940: scratch_load_sbyte_d16 v2, off, off     ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 v2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, s6
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, s6    ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, off
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, off   ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, s6
+// GFX940: scratch_load_sbyte_d16_hi a2, off, s6   ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, off
+// GFX940: scratch_load_sbyte_d16_hi a2, off, off  ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, s6
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, s6    ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, off
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, off   ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, s6
+// GFX940: scratch_load_sbyte_d16_hi v2, off, s6   ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, off
+// GFX940: scratch_load_sbyte_d16_hi v2, off, off  ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16 a2, v4, s6
+// GFX940: scratch_load_short_d16 a2, v4, s6       ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, v4, off
+// GFX940: scratch_load_short_d16 a2, v4, off      ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16 a2, v4, off offset:16
+// GFX940: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16 a2, off, s6
+// GFX940: scratch_load_short_d16 a2, off, s6      ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, off, s6 offset:16
+// GFX940: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, off, off
+// GFX940: scratch_load_short_d16 a2, off, off     ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16 a2, off, off offset:16
+// GFX940: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16 v2, v4, s6
+// GFX940: scratch_load_short_d16 v2, v4, s6       ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, v4, off
+// GFX940: scratch_load_short_d16 v2, v4, off      ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16 v2, v4, off offset:16
+// GFX940: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16 v2, off, s6
+// GFX940: scratch_load_short_d16 v2, off, s6      ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, off, s6 offset:16
+// GFX940: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, off, off
+// GFX940: scratch_load_short_d16 v2, off, off     ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16 v2, off, off offset:16
+// GFX940: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi a2, v4, s6
+// GFX940: scratch_load_short_d16_hi a2, v4, s6    ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, v4, off
+// GFX940: scratch_load_short_d16_hi a2, v4, off   ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi a2, v4, off offset:16
+// GFX940: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi a2, off, s6
+// GFX940: scratch_load_short_d16_hi a2, off, s6   ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, off, s6 offset:16
+// GFX940: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, off, off
+// GFX940: scratch_load_short_d16_hi a2, off, off  ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi a2, off, off offset:16
+// GFX940: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi v2, v4, s6
+// GFX940: scratch_load_short_d16_hi v2, v4, s6    ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, v4, off
+// GFX940: scratch_load_short_d16_hi v2, v4, off   ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi v2, v4, off offset:16
+// GFX940: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi v2, off, s6
+// GFX940: scratch_load_short_d16_hi v2, off, s6   ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, off, s6 offset:16
+// GFX940: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, off, off
+// GFX940: scratch_load_short_d16_hi v2, off, off  ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi v2, off, off offset:16
+// GFX940: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sshort a2, v4, s6
+// GFX940: scratch_load_sshort a2, v4, s6          ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sshort a2, v4, s6 offset:16
+// GFX940: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sshort a2, v4, off
+// GFX940: scratch_load_sshort a2, v4, off         ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sshort a2, v4, off offset:16
+// GFX940: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sshort a2, off, s6
+// GFX940: scratch_load_sshort a2, off, s6         ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sshort a2, off, s6 offset:16
+// GFX940: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sshort a2, off, off
+// GFX940: scratch_load_sshort a2, off, off        ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sshort a2, off, off offset:16
+// GFX940: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sshort v2, v4, s6
+// GFX940: scratch_load_sshort v2, v4, s6          ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sshort v2, v4, s6 offset:16
+// GFX940: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sshort v2, v4, off
+// GFX940: scratch_load_sshort v2, v4, off         ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sshort v2, v4, off offset:16
+// GFX940: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sshort v2, off, s6
+// GFX940: scratch_load_sshort v2, off, s6         ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sshort v2, off, s6 offset:16
+// GFX940: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sshort v2, off, off
+// GFX940: scratch_load_sshort v2, off, off        ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sshort v2, off, off offset:16
+// GFX940: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte a2, v4, s6
+// GFX940: scratch_load_ubyte a2, v4, s6           ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, v4, off
+// GFX940: scratch_load_ubyte a2, v4, off          ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte a2, v4, off offset:16
+// GFX940: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte a2, off, s6
+// GFX940: scratch_load_ubyte a2, off, s6          ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, off, s6 offset:16
+// GFX940: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, off, off
+// GFX940: scratch_load_ubyte a2, off, off         ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte a2, off, off offset:16
+// GFX940: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte v2, v4, s6
+// GFX940: scratch_load_ubyte v2, v4, s6           ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, v4, off
+// GFX940: scratch_load_ubyte v2, v4, off          ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte v2, v4, off offset:16
+// GFX940: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte v2, off, s6
+// GFX940: scratch_load_ubyte v2, off, s6          ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, off, s6 offset:16
+// GFX940: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, off, off
+// GFX940: scratch_load_ubyte v2, off, off         ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte v2, off, off offset:16
+// GFX940: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 a2, v4, s6
+// GFX940: scratch_load_ubyte_d16 a2, v4, s6       ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, v4, off
+// GFX940: scratch_load_ubyte_d16 a2, v4, off      ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 a2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 a2, off, s6
+// GFX940: scratch_load_ubyte_d16 a2, off, s6      ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, off, off
+// GFX940: scratch_load_ubyte_d16 a2, off, off     ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 a2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 v2, v4, s6
+// GFX940: scratch_load_ubyte_d16 v2, v4, s6       ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, v4, off
+// GFX940: scratch_load_ubyte_d16 v2, v4, off      ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 v2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 v2, off, s6
+// GFX940: scratch_load_ubyte_d16 v2, off, s6      ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, off, off
+// GFX940: scratch_load_ubyte_d16 v2, off, off     ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 v2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, s6
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, s6    ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, off
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, off   ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, s6
+// GFX940: scratch_load_ubyte_d16_hi a2, off, s6   ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, off
+// GFX940: scratch_load_ubyte_d16_hi a2, off, off  ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, s6
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, s6    ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, off
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, off   ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, s6
+// GFX940: scratch_load_ubyte_d16_hi v2, off, s6   ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, off
+// GFX940: scratch_load_ubyte_d16_hi v2, off, off  ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ushort a2, v4, s6
+// GFX940: scratch_load_ushort a2, v4, s6          ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ushort a2, v4, s6 offset:16
+// GFX940: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ushort a2, v4, off
+// GFX940: scratch_load_ushort a2, v4, off         ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ushort a2, v4, off offset:16
+// GFX940: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ushort a2, off, s6
+// GFX940: scratch_load_ushort a2, off, s6         ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ushort a2, off, s6 offset:16
+// GFX940: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ushort a2, off, off
+// GFX940: scratch_load_ushort a2, off, off        ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ushort a2, off, off offset:16
+// GFX940: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ushort v2, v4, s6
+// GFX940: scratch_load_ushort v2, v4, s6          ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ushort v2, v4, s6 offset:16
+// GFX940: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ushort v2, v4, off
+// GFX940: scratch_load_ushort v2, v4, off         ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ushort v2, v4, off offset:16
+// GFX940: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ushort v2, off, s6
+// GFX940: scratch_load_ushort v2, off, s6         ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ushort v2, off, s6 offset:16
+// GFX940: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ushort v2, off, off
+// GFX940: scratch_load_ushort v2, off, off        ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ushort v2, off, off offset:16
+// GFX940: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_store_byte v4, a2, s6
+// GFX940: scratch_store_byte v4, a2, s6           ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte v4, a2, s6 offset:16
+// GFX940: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte v4, a2, off
+// GFX940: scratch_store_byte v4, a2, off          ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte v4, a2, off offset:16
+// GFX940: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte off, a2, s6
+// GFX940: scratch_store_byte off, a2, s6          ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte off, a2, s6 offset:16
+// GFX940: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte off, a2, off
+// GFX940: scratch_store_byte off, a2, off         ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte off, a2, off offset:16
+// GFX940: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte v4, v2, s6
+// GFX940: scratch_store_byte v4, v2, s6           ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte v4, v2, s6 offset:16
+// GFX940: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte v4, v2, off
+// GFX940: scratch_store_byte v4, v2, off          ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte v4, v2, off offset:16
+// GFX940: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte off, v2, s6
+// GFX940: scratch_store_byte off, v2, s6          ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte off, v2, s6 offset:16
+// GFX940: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte off, v2, off
+// GFX940: scratch_store_byte off, v2, off         ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_byte off, v2, off offset:16
+// GFX940: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi v4, a2, s6
+// GFX940: scratch_store_byte_d16_hi v4, a2, s6    ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi v4, a2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi v4, a2, off
+// GFX940: scratch_store_byte_d16_hi v4, a2, off   ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi v4, a2, off offset:16
+// GFX940: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi off, a2, s6
+// GFX940: scratch_store_byte_d16_hi off, a2, s6   ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi off, a2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi off, a2, off
+// GFX940: scratch_store_byte_d16_hi off, a2, off  ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi off, a2, off offset:16
+// GFX940: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi v4, v2, s6
+// GFX940: scratch_store_byte_d16_hi v4, v2, s6    ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi v4, v2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi v4, v2, off
+// GFX940: scratch_store_byte_d16_hi v4, v2, off   ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi v4, v2, off offset:16
+// GFX940: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi off, v2, s6
+// GFX940: scratch_store_byte_d16_hi off, v2, s6   ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi off, v2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi off, v2, off
+// GFX940: scratch_store_byte_d16_hi off, v2, off  ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi off, v2, off offset:16
+// GFX940: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dword v4, a2, s6
+// GFX940: scratch_store_dword v4, a2, s6          ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dword v4, a2, s6 offset:16
+// GFX940: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dword v4, a2, off
+// GFX940: scratch_store_dword v4, a2, off         ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dword v4, a2, off offset:16
+// GFX940: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dword off, a2, s6
+// GFX940: scratch_store_dword off, a2, s6         ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dword off, a2, s6 offset:16
+// GFX940: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dword off, a2, off
+// GFX940: scratch_store_dword off, a2, off        ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dword off, a2, off offset:16
+// GFX940: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dword v4, v2, s6
+// GFX940: scratch_store_dword v4, v2, s6          ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dword v4, v2, s6 offset:16
+// GFX940: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dword v4, v2, off
+// GFX940: scratch_store_dword v4, v2, off         ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dword v4, v2, off offset:16
+// GFX940: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dword off, v2, s6
+// GFX940: scratch_store_dword off, v2, s6         ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dword off, v2, s6 offset:16
+// GFX940: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dword off, v2, off
+// GFX940: scratch_store_dword off, v2, off        ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dword off, v2, off offset:16
+// GFX940: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], s6
+// GFX940: scratch_store_dwordx2 v4, a[2:3], s6    ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], off
+// GFX940: scratch_store_dwordx2 v4, a[2:3], off   ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx2 off, a[2:3], s6
+// GFX940: scratch_store_dwordx2 off, a[2:3], s6   ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx2 off, a[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx2 off, a[2:3], off
+// GFX940: scratch_store_dwordx2 off, a[2:3], off  ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx2 off, a[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], s6
+// GFX940: scratch_store_dwordx2 v4, v[2:3], s6    ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], off
+// GFX940: scratch_store_dwordx2 v4, v[2:3], off   ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 off, v[2:3], s6
+// GFX940: scratch_store_dwordx2 off, v[2:3], s6   ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx2 off, v[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx2 off, v[2:3], off
+// GFX940: scratch_store_dwordx2 off, v[2:3], off  ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 off, v[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], s6
+// GFX940: scratch_store_dwordx3 v4, a[2:4], s6    ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], off
+// GFX940: scratch_store_dwordx3 v4, a[2:4], off   ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx3 off, a[2:4], s6
+// GFX940: scratch_store_dwordx3 off, a[2:4], s6   ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx3 off, a[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx3 off, a[2:4], off
+// GFX940: scratch_store_dwordx3 off, a[2:4], off  ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx3 off, a[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], s6
+// GFX940: scratch_store_dwordx3 v4, v[2:4], s6    ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], off
+// GFX940: scratch_store_dwordx3 v4, v[2:4], off   ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 off, v[2:4], s6
+// GFX940: scratch_store_dwordx3 off, v[2:4], s6   ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx3 off, v[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx3 off, v[2:4], off
+// GFX940: scratch_store_dwordx3 off, v[2:4], off  ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 off, v[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], s6
+// GFX940: scratch_store_dwordx4 v4, a[2:5], s6    ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], off
+// GFX940: scratch_store_dwordx4 v4, a[2:5], off   ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx4 off, a[2:5], s6
+// GFX940: scratch_store_dwordx4 off, a[2:5], s6   ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx4 off, a[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx4 off, a[2:5], off
+// GFX940: scratch_store_dwordx4 off, a[2:5], off  ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx4 off, a[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], s6
+// GFX940: scratch_store_dwordx4 v4, v[2:5], s6    ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], off
+// GFX940: scratch_store_dwordx4 v4, v[2:5], off   ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 off, v[2:5], s6
+// GFX940: scratch_store_dwordx4 off, v[2:5], s6   ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx4 off, v[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx4 off, v[2:5], off
+// GFX940: scratch_store_dwordx4 off, v[2:5], off  ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 off, v[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short v4, a2, s6
+// GFX940: scratch_store_short v4, a2, s6          ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short v4, a2, s6 offset:16
+// GFX940: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short v4, a2, off
+// GFX940: scratch_store_short v4, a2, off         ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short v4, a2, off offset:16
+// GFX940: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short off, a2, s6
+// GFX940: scratch_store_short off, a2, s6         ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short off, a2, s6 offset:16
+// GFX940: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short off, a2, off
+// GFX940: scratch_store_short off, a2, off        ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short off, a2, off offset:16
+// GFX940: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short v4, v2, s6
+// GFX940: scratch_store_short v4, v2, s6          ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short v4, v2, s6 offset:16
+// GFX940: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short v4, v2, off
+// GFX940: scratch_store_short v4, v2, off         ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short v4, v2, off offset:16
+// GFX940: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short off, v2, s6
+// GFX940: scratch_store_short off, v2, s6         ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short off, v2, s6 offset:16
+// GFX940: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short off, v2, off
+// GFX940: scratch_store_short off, v2, off        ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short off, v2, off offset:16
+// GFX940: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi v4, a2, s6
+// GFX940: scratch_store_short_d16_hi v4, a2, s6   ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi v4, a2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi v4, a2, off
+// GFX940: scratch_store_short_d16_hi v4, a2, off  ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi v4, a2, off offset:16
+// GFX940: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi off, a2, s6
+// GFX940: scratch_store_short_d16_hi off, a2, s6  ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi off, a2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi off, a2, off
+// GFX940: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi off, a2, off offset:16
+// GFX940: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi v4, v2, s6
+// GFX940: scratch_store_short_d16_hi v4, v2, s6   ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi v4, v2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi v4, v2, off
+// GFX940: scratch_store_short_d16_hi v4, v2, off  ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi v4, v2, off offset:16
+// GFX940: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi off, v2, s6
+// GFX940: scratch_store_short_d16_hi off, v2, s6  ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi off, v2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi off, v2, off
+// GFX940: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi off, v2, off offset:16
+// GFX940: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]

diff  --git a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s
index 0f4fae7546b7f..b0d5d2d390cec 100644
--- a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s
+++ b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s
@@ -222,23 +222,23 @@ scratch_store_dword off, v2, s1 offset:12
 
 // FIXME: Should error about multiple offsets
 scratch_load_dword v1, v2, s1
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
 // VI-ERR: error: instruction not supported on this GPU
 
 scratch_load_dword v1, v2, s1 offset:32
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
 // VI-ERR: error: instruction not supported on this GPU
 
 scratch_store_dword v1, v2, s1
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
 // VI-ERR: error: instruction not supported on this GPU
 
 scratch_store_dword v1, v2, s1 offset:32
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
 // VI-ERR: error: instruction not supported on this GPU
 
 scratch_load_dword v1, off, exec_hi

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt b/llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt
new file mode 100644
index 0000000000000..b3182c98cb1b7
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt
@@ -0,0 +1,1057 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX940 %s
+
+# GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword a2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword v2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dword v2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte a2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte v2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort a2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort a2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort v2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort v2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte a2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte v2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort a2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort a2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort v2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort v2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_store_byte v4, a2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte v4, a2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte off, a2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte off, a2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte v4, v2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte v4, v2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte off, v2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte off, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword v4, a2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword v4, a2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword off, a2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword off, a2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword v4, v2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword v4, v2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword off, v2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword off, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short v4, a2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short v4, a2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short off, a2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short off, a2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short v4, v2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short v4, v2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short off, v2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short off, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00


        


More information about the llvm-commits mailing list