[llvm] 36fe3f1 - [AMDGPU] flat scratch SVS addressing mode for gfx940
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 14 15:23:49 PDT 2022
Author: Stanislav Mekhanoshin
Date: 2022-03-14T15:23:36-07:00
New Revision: 36fe3f13a997cc45cbffe8a4631db4b1dade6eb8
URL: https://github.com/llvm/llvm-project/commit/36fe3f13a997cc45cbffe8a4631db4b1dade6eb8
DIFF: https://github.com/llvm/llvm-project/commit/36fe3f13a997cc45cbffe8a4631db4b1dade6eb8.diff
LOG: [AMDGPU] flat scratch SVS addressing mode for gfx940
Both VADDR and SADDR are used in SVS mode.
Differential Revision: https://reviews.llvm.org/D121254
Added:
llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
llvm/test/CodeGen/AMDGPU/flat-scratch.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
llvm/test/MC/AMDGPU/flat-scratch-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index f2b39d68b8572..fd409d6270cce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -93,6 +93,10 @@ def gi_flat_scratch_saddr :
GIComplexOperandMatcher<s32, "selectScratchSAddr">,
GIComplexPatternEquiv<ScratchSAddr>;
+def gi_flat_scratch_svaddr :
+ GIComplexOperandMatcher<s32, "selectScratchSVAddr">,
+ GIComplexPatternEquiv<ScratchSVAddr>;
+
def gi_ds_1addr_1offset :
GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
GIComplexPatternEquiv<DS1Addr1Offset>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index dc105dad27ce8..f5b51abd58eaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1798,6 +1798,60 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
+ SDValue &VAddr, SDValue &SAddr,
+ SDValue &Offset) const {
+ int64_t ImmOffset = 0;
+
+ SDValue LHS, RHS;
+ if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
+ int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+
+ if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ Addr = LHS;
+ ImmOffset = COffsetVal;
+ } else if (!LHS->isDivergent() && COffsetVal > 0) {
+ SDLoc SL(N);
+ // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
+ // (large_offset & MaxOffset);
+ int64_t SplitImmOffset, RemainderOffset;
+ std::tie(SplitImmOffset, RemainderOffset)
+ = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
+
+ if (isUInt<32>(RemainderOffset)) {
+ SDNode *VMov = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
+ CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
+ VAddr = SDValue(VMov, 0);
+ SAddr = LHS;
+ Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+ return true;
+ }
+ }
+ }
+
+ if (Addr.getOpcode() != ISD::ADD)
+ return false;
+
+ LHS = Addr.getOperand(0);
+ RHS = Addr.getOperand(1);
+
+ if (!LHS->isDivergent() && RHS->isDivergent()) {
+ SAddr = LHS;
+ VAddr = RHS;
+ } else if (!RHS->isDivergent() && LHS->isDivergent()) {
+ SAddr = RHS;
+ VAddr = LHS;
+ } else {
+ return false;
+ }
+
+ SAddr = SelectSAddrFI(CurDAG, SAddr);
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index d638d9877a9b3..acf82d6d15e4c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -188,6 +188,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &VOffset, SDValue &Offset) const;
bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &Offset) const;
+ bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &SAddr, SDValue &Offset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 533b32e94dcf8..bf0f9fa976d46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3792,6 +3792,56 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
}};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
+ Register Addr = Root.getReg();
+ Register PtrBase;
+ int64_t ConstOffset;
+ int64_t ImmOffset = 0;
+
+ // Match the immediate offset first, which canonically is moved as low as
+ // possible.
+ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
+
+ if (ConstOffset != 0 &&
+ TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ Addr = PtrBase;
+ ImmOffset = ConstOffset;
+ }
+
+ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+ if (!AddrDef)
+ return None;
+
+ if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
+ return None;
+
+ Register RHS = AddrDef->MI->getOperand(2).getReg();
+ if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
+ return None;
+
+ Register LHS = AddrDef->MI->getOperand(1).getReg();
+ auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
+
+ if (LHSDef && LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
+ int FI = LHSDef->MI->getOperand(1).getIndex();
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+ [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ }};
+ }
+
+ if (!isSGPR(LHS))
+ return None;
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ }};
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MachineInstr *MI = Root.getParent();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 42095332d11ac..a879b9a733097 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -203,6 +203,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectScratchSAddr(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectScratchSVAddr(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffen(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 58a7980b50458..9f086a29d16f5 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -12,6 +12,7 @@ def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWant
def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
+def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -56,6 +57,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
bits<1> dlcValue = 0;
bits<1> has_sccb = 1;
bits<1> sccbValue = 0;
+ bits<1> has_sve = 0; // Scratch VGPR Enable
+ bits<1> sve = 0;
let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
!if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -123,7 +126,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
// Only valid on GFX9+
let Inst{12-0} = offset;
- let Inst{13} = lds;
+ let Inst{13} = !if(ps.has_sve, ps.sve, lds);
let Inst{15-14} = seg;
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
@@ -273,16 +276,19 @@ class FlatScratchInst <string sv_op, string mode> {
class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
bit HasTiedOutput = 0,
bit EnableSaddr = 0,
- bit EnableVaddr = !not(EnableSaddr)>
+ bit EnableSVE = 0,
+ bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))>
: FLAT_Pseudo<
opName,
(outs getLdStRegisterOperand<regClass>.ret:$vdst),
!con(
- !if(EnableSaddr,
- (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
- !if(EnableVaddr,
- (ins VGPR_32:$vaddr, flat_offset:$offset),
- (ins flat_offset:$offset))),
+ !if(EnableSVE,
+ (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
+ !if(EnableSaddr,
+ (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
+ !if(EnableVaddr,
+ (ins VGPR_32:$vaddr, flat_offset:$offset),
+ (ins flat_offset:$offset)))),
!if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
(ins CPol_0:$cpol))),
" $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
@@ -291,7 +297,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
let has_vaddr = EnableVaddr;
- let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
+ let has_sve = EnableSVE;
+ let sve = EnableVaddr;
+ let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
let maybeAtomic = 1;
let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
@@ -299,15 +307,18 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
}
class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
- bit EnableVaddr = !not(EnableSaddr),
+ bit EnableSVE = 0,
+ bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)),
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo<
opName,
(outs),
- !if(EnableSaddr,
- (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
- !if(EnableVaddr,
- (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
- (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))),
+ !if(EnableSVE,
+ (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
+ !if(EnableSaddr,
+ (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
+ !if(EnableVaddr,
+ (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
+ (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))),
" "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let mayLoad = 0;
let mayStore = 1;
@@ -315,7 +326,9 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
let has_vaddr = EnableVaddr;
- let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
+ let has_sve = EnableSVE;
+ let sve = EnableVaddr;
+ let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
let maybeAtomic = 1;
}
@@ -326,8 +339,12 @@ multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit H
def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
FlatScratchInst<opName, "SS">;
+ let SubtargetPredicate = isGFX940Plus in
+ def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>,
+ FlatScratchInst<opName, "SVS">;
+
let SubtargetPredicate = HasFlatScratchSTMode in
- def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>,
+ def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>,
FlatScratchInst<opName, "ST">;
}
}
@@ -339,8 +356,12 @@ multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
FlatScratchInst<opName, "SS">;
+ let SubtargetPredicate = isGFX940Plus in
+ def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>,
+ FlatScratchInst<opName, "SVS">;
+
let SubtargetPredicate = HasFlatScratchSTMode in
- def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>,
+ def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>,
FlatScratchInst<opName, "ST">;
}
}
@@ -962,6 +983,22 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
(inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
+class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset))),
+ (inst $vaddr, $saddr, $offset, 0)
+>;
+
+class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> : GCNPat <
+ (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset)),
+ (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
+>;
+
+class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
+ (inst $vaddr, $saddr, $offset, 0, $in)
+>;
+
let OtherPredicates = [HasFlatAddressSpace] in {
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
@@ -1145,6 +1182,11 @@ multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueTy
def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 26;
}
+
+ def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+ let SubtargetPredicate = isGFX940Plus;
+ let AddedComplexity = 27;
+ }
}
multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
@@ -1156,6 +1198,11 @@ multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 26;
}
+
+ def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+ let SubtargetPredicate = isGFX940Plus;
+ let AddedComplexity = 27;
+ }
}
multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
@@ -1166,6 +1213,11 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 26;
}
+
+ def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+ let SubtargetPredicate = isGFX940Plus;
+ let AddedComplexity = 27;
+ }
}
let OtherPredicates = [HasFlatGlobalInsts] in {
@@ -1451,9 +1503,25 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
let AssemblerPredicate = isGFX940Plus;
let DecoderNamespace = "GFX9";
+ let Inst{13} = ps.sve;
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
}
+multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
+ let AssemblerPredicate = isGFX8GFX9NotGFX940;
+ let OtherPredicates = [isGFX8GFX9NotGFX940];
+ }
+ def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
+ let DecoderNamespace = "GFX9";
+ }
+ let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
+ def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+ def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+ }
+}
+
def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
@@ -1573,28 +1641,28 @@ defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>;
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>;
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>;
-defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
-defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
-defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
-defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
-defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
-defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
-defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
-defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
-defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
-defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
-defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>;
-defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
-defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>;
-defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
-defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>;
-defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
-defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
-defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
-defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
-defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
-defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
-defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>;
+defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>;
+defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>;
+defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>;
+defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>;
+defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>;
+defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>;
+defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>;
+defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>;
+defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>;
let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
// These instructions are encoded
diff erently on gfx90* and gfx940.
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9b95ade37dfb3..a6470f85a313f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -677,7 +677,9 @@ void SIFoldOperands::foldOperand(
if (TII->isFLATScratch(*UseMI) &&
AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
- AMDGPU::OpName::vaddr) != -1) {
+ AMDGPU::OpName::vaddr) != -1 &&
+ AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
+ AMDGPU::OpName::saddr) == -1) {
unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
UseMI->setDesc(TII->get(NewOpc));
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 239980e1e9f78..25d3f4a765e6b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1244,6 +1244,11 @@ namespace AMDGPU {
LLVM_READONLY
int getFlatScratchInstSTfromSS(uint16_t Opcode);
+ /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
+ /// of an SVS (SADDR + VADDR) form.
+ LLVM_READONLY
+ int getFlatScratchInstSVfromSVS(uint16_t Opcode);
+
/// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
/// of an SV (VADDR) form.
LLVM_READONLY
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index cd2176802ca4e..33b7bc7008f50 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2643,6 +2643,14 @@ def getFlatScratchInstSSfromSV : InstrMapping {
let ValueCols = [["SS"]];
}
+def getFlatScratchInstSVfromSVS : InstrMapping {
+ let FilterClass = "FlatScratchInst";
+ let RowFields = ["SVOp"];
+ let ColFields = ["Mode"];
+ let KeyCol = ["SVS"];
+ let ValueCols = [["SV"]];
+}
+
def getFlatScratchInstSVfromSS : InstrMapping {
let FilterClass = "FlatScratchInst";
let RowFields = ["SVOp"];
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 120b601f9a042..776cb70f9f7d5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2143,18 +2143,23 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Offset = 0;
}
- assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) &&
- "Unexpected vaddr for flat scratch with a FI operand");
-
- // On GFX10 we have ST mode to use no registers for an address.
- // Otherwise we need to materialize 0 into an SGPR.
- if (!Offset && ST.hasFlatScratchSTMode()) {
+ if (!Offset) {
unsigned Opc = MI->getOpcode();
- unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
- MI->RemoveOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
- MI->setDesc(TII->get(NewOpc));
- return;
+ int NewOpc = -1;
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1) {
+ NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+ } else if (ST.hasFlatScratchSTMode()) {
+ // On GFX10 we have ST mode to use no registers for an address.
+ // Otherwise we need to materialize 0 into an SGPR.
+ NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+ }
+
+ if (NewOpc != -1) {
+ MI->RemoveOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+ MI->setDesc(TII->get(NewOpc));
+ return;
+ }
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
index 35c4d56bd901d..5914521cf4189 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
; Make sure flat_scratch_init is set
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a38017a709548..1c3c79f8b867d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-LABEL: store_load_sindex_kernel:
@@ -40,6 +41,22 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_sindex_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword v1, v0, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s0
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -89,6 +106,18 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_vindex_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:128 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -137,6 +166,19 @@ define void @store_load_vindex_foo(i32 %idx) {
; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_vindex_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, s32 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -167,6 +209,14 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
; GFX10-NEXT: scratch_store_dword v0, v1, off offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: private_ptr_foo:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 0x41200000
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1
store float 1.000000e+01, float addrspace(5)* %gep, align 4
ret void
@@ -214,6 +264,24 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_sindex_small_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword v1, v0, off offset:260 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s0
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:260 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -271,6 +339,20 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_vindex_small_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, off offset:260 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:384 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -328,6 +410,21 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_vindex_small_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -386,6 +483,26 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_sindex_large_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword v1, v0, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s0
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -443,6 +560,22 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_vindex_large_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi offset:124 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -500,6 +633,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_vindex_large_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -551,6 +701,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_large_imm_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: v_mov_b32_e32 v0, 13
+; GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x3e80
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%i = alloca [4096 x i32], align 4, addrspace(5)
%i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
@@ -595,6 +758,20 @@ define void @store_load_large_imm_offset_foo() {
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_large_imm_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 13
+; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x3e80
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: scratch_store_dword v0, v1, s32 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [4096 x i32], align 4, addrspace(5)
%i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
@@ -638,6 +815,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
; GFX10-NEXT: scratch_load_dword v0, v0, off offset:1024 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_endpgm
+;
+; GFX940-LABEL: store_load_vidx_sidx_offset:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_add_lshl_u32 v0, s0, v0, 2
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:1028 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:1028 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
bb:
%alloca = alloca [32 x i32], align 4, addrspace(5)
%vidx = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -672,6 +861,16 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
; GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_i64_aligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], 15
+; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15, i64 addrspace(5)* %arg, align 8
%load = load volatile i64, i64 addrspace(5)* %arg, align 8
@@ -701,6 +900,16 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
; GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_i64_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], 15
+; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15, i64 addrspace(5)* %arg, align 1
%load = load volatile i64, i64 addrspace(5)* %arg, align 1
@@ -738,6 +947,21 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg)
; GFX10-NEXT: scratch_load_dwordx3 v[0:2], v0, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_v3i32_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: s_mov_b32 s2, 3
+; GFX940-NEXT: s_mov_b32 s1, 2
+; GFX940-NEXT: s_mov_b32 s0, 1
+; GFX940-NEXT: v_mov_b32_e32 v4, s2
+; GFX940-NEXT: v_mov_b32_e32 v3, s1
+; GFX940-NEXT: v_mov_b32_e32 v2, s0
+; GFX940-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <3 x i32> <i32 1, i32 2, i32 3>, <3 x i32> addrspace(5)* %arg, align 1
%load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1
@@ -779,6 +1003,21 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg)
; GFX10-NEXT: scratch_load_dwordx4 v[0:3], v0, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: store_load_v4i32_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: s_mov_b32 s3, 4
+; GFX940-NEXT: s_mov_b32 s2, 3
+; GFX940-NEXT: s_mov_b32 s1, 2
+; GFX940-NEXT: s_mov_b32 s0, 1
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX940-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %arg, align 1
%load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 5cf182ba82227..cba84f5dfe4a6 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -2,6 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9-PAL %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1010-PAL %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1030-PAL %s
@@ -75,6 +76,20 @@ define amdgpu_kernel void @zero_init_kernel() {
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: zero_init_kernel:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_mov_b32 s0, 0
+; GFX940-NEXT: s_mov_b32 s1, s0
+; GFX940-NEXT: s_mov_b32 s2, s0
+; GFX940-NEXT: s_mov_b32 s3, s0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:64
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:48
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:32
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:16
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: zero_init_kernel:
; GFX1010-PAL: ; %bb.0:
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -193,6 +208,22 @@ define void @zero_init_foo() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: zero_init_foo:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: s_mov_b32 s0, 0
+; GFX940-NEXT: s_mov_b32 s1, s0
+; GFX940-NEXT: s_mov_b32 s2, s0
+; GFX940-NEXT: s_mov_b32 s3, s0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: zero_init_foo:
; GFX10-PAL: ; %bb.0:
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -211,6 +242,21 @@ define void @zero_init_foo() {
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: zero_init_foo:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s0, 0
+; GCN-NEXT: s_mov_b32 s1, s0
+; GCN-NEXT: s_mov_b32 s2, s0
+; GCN-NEXT: s_mov_b32 s3, s0
+; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [32 x i16], align 2, addrspace(5)
%cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)*
call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false)
@@ -278,6 +324,22 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_sindex_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_add_i32 s1, s1, 4
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX10-PAL-LABEL: store_load_sindex_kernel:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_getpc_b64 s[4:5]
@@ -302,6 +364,21 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_endpgm
+; GCN-LABEL: store_load_sindex_kernel:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 15
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshl_b32 s1, s0, 2
+; GCN-NEXT: s_and_b32 s0, s0, 15
+; GCN-NEXT: s_lshl_b32 s0, s0, 2
+; GCN-NEXT: s_add_u32 s1, 4, s1
+; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_add_u32 s0, 4, s0
+; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -371,6 +448,20 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_sindex_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_add_i32 s1, s1, 4
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX10-PAL-LABEL: store_load_sindex_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -393,6 +484,19 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_endpgm
+; GCN-LABEL: store_load_sindex_foo:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_lshl_b32 s1, s0, 2
+; GCN-NEXT: s_and_b32 s0, s0, 15
+; GCN-NEXT: s_lshl_b32 s0, s0, 2
+; GCN-NEXT: s_add_u32 s1, 4, s1
+; GCN-NEXT: v_mov_b32_e32 v0, 15
+; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_add_u32 s0, 4, s0
+; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -459,6 +563,17 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_vindex_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_sub_u32_e32 v0, 4, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX10-PAL-LABEL: store_load_vindex_kernel:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -480,6 +595,16 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
; GFX10-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_endpgm
+; GCN-LABEL: store_load_vindex_kernel:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 15
+; GCN-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_sub_u32_e32 v0, 4, v0
+; GCN-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -539,6 +664,19 @@ define void @store_load_vindex_foo(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_vindex_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, s32 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_vindex_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -553,6 +691,18 @@ define void @store_load_vindex_foo(i32 %idx) {
; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_vindex_foo:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, 15
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: v_and_b32_e32 v0, v0, v2
+; GCN-NEXT: scratch_store_dword v1, v2, s32 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
@@ -592,6 +742,14 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: private_ptr_foo:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 0x41200000
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: private_ptr_foo:
; GFX10-PAL: ; %bb.0:
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -600,6 +758,13 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off offset:4
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: private_ptr_foo:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, 0x41200000
+; GCN-NEXT: scratch_store_dword v0, v1, off offset:4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1
store float 1.000000e+01, float addrspace(5)* %gep, align 4
ret void
@@ -683,6 +848,22 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:320
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: zero_init_small_offset_kernel:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_mov_b32 s0, 0
+; GFX940-NEXT: s_mov_b32 s1, s0
+; GFX940-NEXT: s_mov_b32 s2, s0
+; GFX940-NEXT: s_mov_b32 s3, s0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:272
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:288
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:304
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:320
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: zero_init_small_offset_kernel:
; GFX1010-PAL: ; %bb.0:
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -815,6 +996,24 @@ define void @zero_init_small_offset_foo() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: zero_init_small_offset_foo:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, off, s32 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_mov_b32 s0, 0
+; GFX940-NEXT: s_mov_b32 s1, s0
+; GFX940-NEXT: s_mov_b32 s2, s0
+; GFX940-NEXT: s_mov_b32 s3, s0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: zero_init_small_offset_foo:
; GFX10-PAL: ; %bb.0:
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -835,6 +1034,23 @@ define void @zero_init_small_offset_foo() {
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: zero_init_small_offset_foo:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: scratch_load_dword v0, off, s32 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_mov_b32 s0, 0
+; GCN-NEXT: s_mov_b32 s1, s0
+; GCN-NEXT: s_mov_b32 s2, s0
+; GCN-NEXT: s_mov_b32 s3, s0
+; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288
+; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%padding = alloca [64 x i32], align 4, addrspace(5)
%alloca = alloca [32 x i16], align 2, addrspace(5)
%pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef
@@ -912,6 +1128,24 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_sindex_small_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_addk_i32 s1, 0x104
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_addk_i32 s0, 0x104
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: store_load_sindex_small_offset_kernel:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_getpc_b64 s[4:5]
@@ -1046,6 +1280,22 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_sindex_small_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_addk_i32 s1, 0x104
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_addk_i32 s0, 0x104
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: store_load_sindex_small_offset_foo:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -1173,6 +1423,19 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_vindex_small_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:260 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_sub_u32_e32 v0, 0x104, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: store_load_vindex_small_offset_kernel:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -1292,6 +1555,21 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_vindex_small_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_vindex_small_offset_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1309,6 +1587,20 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_vindex_small_offset_foo:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, 15
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: v_and_b32_e32 v0, v0, v2
+; GCN-NEXT: scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -1407,6 +1699,26 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: zero_init_large_offset_kernel:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:16 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_mov_b32 s0, 0
+; GFX940-NEXT: s_mov_b32 s1, s0
+; GFX940-NEXT: s_mov_b32 s2, s0
+; GFX940-NEXT: s_mov_b32 s3, s0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: zero_init_large_offset_kernel:
; GFX1010-PAL: ; %bb.0:
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -1555,6 +1867,28 @@ define void @zero_init_large_offset_foo() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: zero_init_large_offset_foo:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, off, s32 offset:16 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_mov_b32 s0, 0
+; GFX940-NEXT: s_mov_b32 s1, s0
+; GFX940-NEXT: s_mov_b32 s2, s0
+; GFX940-NEXT: s_mov_b32 s3, s0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4010
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1010-PAL-LABEL: zero_init_large_offset_foo:
; GFX1010-PAL: ; %bb.0:
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1684,6 +2018,24 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_sindex_large_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_addk_i32 s1, 0x4004
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_addk_i32 s0, 0x4004
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: store_load_sindex_large_offset_kernel:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_getpc_b64 s[4:5]
@@ -1818,6 +2170,22 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_sindex_large_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s1, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_addk_i32 s1, 0x4004
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_addk_i32 s0, 0x4004
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: store_load_sindex_large_offset_foo:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -1945,6 +2313,20 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_vindex_large_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT: scratch_store_dword v0, v1, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_sub_u32_e32 v0, 0x4004, v0
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: store_load_vindex_large_offset_kernel:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -2064,6 +2446,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_vindex_large_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX940-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_vindex_large_offset_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2081,6 +2480,22 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_vindex_large_offset_foo:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, 15
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: v_and_b32_e32 v0, v0, v2
+; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000
+; GCN-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000
+; GCN-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
@@ -2155,6 +2570,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_large_imm_offset_kernel:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: v_mov_b32_e32 v0, 13
+; GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x3000
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:3716 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:3716 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX1010-PAL-LABEL: store_load_large_imm_offset_kernel:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_getpc_b64 s[2:3]
@@ -2262,6 +2690,20 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_large_imm_offset_foo:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 13
+; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x3000
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: scratch_store_dword v0, v1, s32 offset:3716 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:3716 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_large_imm_offset_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2278,6 +2720,19 @@ define void @store_load_large_imm_offset_foo() {
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s0 offset:1664 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_large_imm_offset_foo:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 13
+; GCN-NEXT: scratch_store_dword off, v0, s32 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3000
+; GCN-NEXT: v_mov_b32_e32 v1, 15
+; GCN-NEXT: scratch_store_dword v0, v1, s32 offset:3712 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:3712 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [4096 x i32], align 4, addrspace(5)
%i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
@@ -2343,6 +2798,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: store_load_vidx_sidx_offset:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 15
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_add_lshl_u32 v0, s0, v0, 2
+; GFX940-NEXT: scratch_store_dword v0, v1, off offset:1028 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dword v0, v0, off offset:1028 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+;
; GFX10-PAL-LABEL: store_load_vidx_sidx_offset:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_getpc_b64 s[4:5]
@@ -2364,6 +2831,17 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
; GFX10-PAL-NEXT: scratch_load_dword v0, v0, off offset:1024 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_endpgm
+; GCN-LABEL: store_load_vidx_sidx_offset:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x24
+; GCN-NEXT: v_mov_b32_e32 v1, 15
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_add_lshl_u32 v0, s0, v0, 2
+; GCN-NEXT: scratch_store_dword v0, v1, off offset:1028 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: scratch_load_dword v0, v0, off offset:1028 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_endpgm
bb:
%alloca = alloca [32 x i32], align 4, addrspace(5)
%vidx = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -2410,6 +2888,17 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_i64_aligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_i64_aligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2421,6 +2910,16 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
; GFX10-PAL-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_i64_aligned:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, 15
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15, i64 addrspace(5)* %arg, align 8
%load = load volatile i64, i64 addrspace(5)* %arg, align 8
@@ -2462,6 +2961,17 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_i64_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, 15
+; GFX940-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_i64_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2473,6 +2983,16 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
; GFX10-PAL-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_i64_unaligned:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, 15
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15, i64 addrspace(5)* %arg, align 1
%load = load volatile i64, i64 addrspace(5)* %arg, align 1
@@ -2517,6 +3037,18 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg)
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_v3i32_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, 1
+; GFX940-NEXT: v_mov_b32_e32 v3, 2
+; GFX940-NEXT: v_mov_b32_e32 v4, 3
+; GFX940-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_v3i32_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2529,6 +3061,17 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg)
; GFX10-PAL-NEXT: scratch_load_dwordx3 v[0:2], v0, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_v3i32_unaligned:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, 1
+; GCN-NEXT: v_mov_b32_e32 v3, 2
+; GCN-NEXT: v_mov_b32_e32 v4, 3
+; GCN-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <3 x i32> <i32 1, i32 2, i32 3>, <3 x i32> addrspace(5)* %arg, align 1
%load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1
@@ -2576,6 +3119,19 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg)
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_v4i32_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, 1
+; GFX940-NEXT: v_mov_b32_e32 v3, 2
+; GFX940-NEXT: v_mov_b32_e32 v4, 3
+; GFX940-NEXT: v_mov_b32_e32 v5, 4
+; GFX940-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-PAL-LABEL: store_load_v4i32_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2589,6 +3145,18 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg)
; GFX10-PAL-NEXT: scratch_load_dwordx4 v[0:3], v0, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: store_load_v4i32_unaligned:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, 1
+; GCN-NEXT: v_mov_b32_e32 v3, 2
+; GCN-NEXT: v_mov_b32_e32 v4, 3
+; GCN-NEXT: v_mov_b32_e32 v5, 4
+; GCN-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %arg, align 1
%load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1
@@ -2629,6 +3197,17 @@ define void @store_load_i32_negative_unaligned(i8 addrspace(5)* nocapture %arg)
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_i32_negative_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_add_u32_e32 v0, -1, v0
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: scratch_store_byte v0, v1, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1010-PAL-LABEL: store_load_i32_negative_unaligned:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2693,6 +3272,17 @@ define void @store_load_i32_large_negative_unaligned(i8 addrspace(5)* nocapture
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX940-LABEL: store_load_i32_large_negative_unaligned:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: s_movk_i32 s0, 0xef7f
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_ubyte v0, v0, s0 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1010-PAL-LABEL: store_load_i32_large_negative_unaligned:
; GFX1010-PAL: ; %bb.0: ; %bb
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2803,6 +3393,26 @@ define amdgpu_ps void @large_offset() {
; GFX9-PAL-NEXT: ;;#ASMEND
; GFX9-PAL-NEXT: s_endpgm
;
+; GFX940-LABEL: large_offset:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NEXT: v_mov_b32_e32 v1, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, v0
+; GFX940-NEXT: v_mov_b32_e32 v3, v0
+; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, 16
+; GFX940-NEXT: ;;#ASMSTART
+; GFX940-NEXT: ; use v0
+; GFX940-NEXT: ;;#ASMEND
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x810
+; GFX940-NEXT: ;;#ASMSTART
+; GFX940-NEXT: ; use v0
+; GFX940-NEXT: ;;#ASMEND
+; GFX940-NEXT: s_endpgm
+;
; GFX10-PAL-LABEL: large_offset:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
index b511b98ac2551..00c74ff0839da 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
@@ -282,10 +282,10 @@ define amdgpu_kernel void @private_nontemporal_load_1(
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: s_load_dword s4, s[0:1], 0x0
; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX940-NOTTGSPLIT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, 0
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s4
-; GFX940-NOTTGSPLIT-NEXT: scratch_load_dword v0, v0, off nt
+; GFX940-NOTTGSPLIT-NEXT: scratch_load_dword v0, v0, s4 nt
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: global_store_dword v1, v0, s[2:3]
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
@@ -294,10 +294,10 @@ define amdgpu_kernel void @private_nontemporal_load_1(
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: s_load_dword s4, s[0:1], 0x0
; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX940-TGSPLIT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, 0
; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s4
-; GFX940-TGSPLIT-NEXT: scratch_load_dword v0, v0, off nt
+; GFX940-TGSPLIT-NEXT: scratch_load_dword v0, v0, s4 nt
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3]
; GFX940-TGSPLIT-NEXT: s_endpgm
@@ -579,24 +579,24 @@ define amdgpu_kernel void @private_nontemporal_store_1(
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
; GFX940-NOTTGSPLIT-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NOTTGSPLIT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s4
; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword v0, v1, off nt
+; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword v0, v1, s4 nt
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: private_nontemporal_store_1:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
; GFX940-TGSPLIT-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-TGSPLIT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s4
; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-TGSPLIT-NEXT: scratch_store_dword v0, v1, off nt
+; GFX940-TGSPLIT-NEXT: scratch_store_dword v0, v1, s4 nt
; GFX940-TGSPLIT-NEXT: s_endpgm
i32 addrspace(1)* %in, i32 addrspace(5)* %out) {
entry:
diff --git a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
new file mode 100644
index 0000000000000..3af48bcd8ea18
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
@@ -0,0 +1,1057 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
+
+scratch_load_dword a2, v4, s6
+// GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dword a2, v4, s6 offset:16
+// GFX940: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dword a2, v4, off
+// GFX940: scratch_load_dword a2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dword a2, v4, off offset:16
+// GFX940: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dword a2, off, s6
+// GFX940: scratch_load_dword a2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dword a2, off, s6 offset:16
+// GFX940: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dword a2, off, off
+// GFX940: scratch_load_dword a2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dword a2, off, off offset:16
+// GFX940: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dword v2, v4, s6
+// GFX940: scratch_load_dword v2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dword v2, v4, s6 offset:16
+// GFX940: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dword v2, v4, off
+// GFX940: scratch_load_dword v2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dword v2, v4, off offset:16
+// GFX940: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dword v2, off, s6
+// GFX940: scratch_load_dword v2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dword v2, off, s6 offset:16
+// GFX940: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dword v2, off, off
+// GFX940: scratch_load_dword v2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dword v2, off, off offset:16
+// GFX940: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, s6
+// GFX940: scratch_load_dwordx2 a[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, s6 offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, off
+// GFX940: scratch_load_dwordx2 a[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx2 a[2:3], v4, off offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx2 a[2:3], off, s6
+// GFX940: scratch_load_dwordx2 a[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], off, s6 offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx2 a[2:3], off, off
+// GFX940: scratch_load_dwordx2 a[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx2 a[2:3], off, off offset:16
+// GFX940: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, s6
+// GFX940: scratch_load_dwordx2 v[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, s6 offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, off
+// GFX940: scratch_load_dwordx2 v[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 v[2:3], v4, off offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 v[2:3], off, s6
+// GFX940: scratch_load_dwordx2 v[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], off, s6 offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx2 v[2:3], off, off
+// GFX940: scratch_load_dwordx2 v[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx2 v[2:3], off, off offset:16
+// GFX940: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, s6
+// GFX940: scratch_load_dwordx3 a[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, s6 offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, off
+// GFX940: scratch_load_dwordx3 a[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx3 a[2:4], v4, off offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx3 a[2:4], off, s6
+// GFX940: scratch_load_dwordx3 a[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], off, s6 offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx3 a[2:4], off, off
+// GFX940: scratch_load_dwordx3 a[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx3 a[2:4], off, off offset:16
+// GFX940: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, s6
+// GFX940: scratch_load_dwordx3 v[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, s6 offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, off
+// GFX940: scratch_load_dwordx3 v[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 v[2:4], v4, off offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 v[2:4], off, s6
+// GFX940: scratch_load_dwordx3 v[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], off, s6 offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx3 v[2:4], off, off
+// GFX940: scratch_load_dwordx3 v[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx3 v[2:4], off, off offset:16
+// GFX940: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, s6
+// GFX940: scratch_load_dwordx4 a[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, s6 offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, off
+// GFX940: scratch_load_dwordx4 a[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx4 a[2:5], v4, off offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_dwordx4 a[2:5], off, s6
+// GFX940: scratch_load_dwordx4 a[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], off, s6 offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_dwordx4 a[2:5], off, off
+// GFX940: scratch_load_dwordx4 a[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx4 a[2:5], off, off offset:16
+// GFX940: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, s6
+// GFX940: scratch_load_dwordx4 v[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, s6 offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, off
+// GFX940: scratch_load_dwordx4 v[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 v[2:5], v4, off offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 v[2:5], off, s6
+// GFX940: scratch_load_dwordx4 v[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], off, s6 offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_dwordx4 v[2:5], off, off
+// GFX940: scratch_load_dwordx4 v[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_dwordx4 v[2:5], off, off offset:16
+// GFX940: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte a2, v4, s6
+// GFX940: scratch_load_sbyte a2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, v4, off
+// GFX940: scratch_load_sbyte a2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte a2, v4, off offset:16
+// GFX940: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte a2, off, s6
+// GFX940: scratch_load_sbyte a2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, off, s6 offset:16
+// GFX940: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte a2, off, off
+// GFX940: scratch_load_sbyte a2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte a2, off, off offset:16
+// GFX940: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte v2, v4, s6
+// GFX940: scratch_load_sbyte v2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, v4, off
+// GFX940: scratch_load_sbyte v2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte v2, v4, off offset:16
+// GFX940: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte v2, off, s6
+// GFX940: scratch_load_sbyte v2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, off, s6 offset:16
+// GFX940: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte v2, off, off
+// GFX940: scratch_load_sbyte v2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte v2, off, off offset:16
+// GFX940: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 a2, v4, s6
+// GFX940: scratch_load_sbyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, v4, off
+// GFX940: scratch_load_sbyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 a2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 a2, off, s6
+// GFX940: scratch_load_sbyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16 a2, off, off
+// GFX940: scratch_load_sbyte_d16 a2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 a2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16 v2, v4, s6
+// GFX940: scratch_load_sbyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, v4, off
+// GFX940: scratch_load_sbyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 v2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 v2, off, s6
+// GFX940: scratch_load_sbyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16 v2, off, off
+// GFX940: scratch_load_sbyte_d16 v2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16 v2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, s6
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, off
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi a2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, s6
+// GFX940: scratch_load_sbyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, off
+// GFX940: scratch_load_sbyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi a2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, s6
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, off
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi v2, v4, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, s6
+// GFX940: scratch_load_sbyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, s6 offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, off
+// GFX940: scratch_load_sbyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sbyte_d16_hi v2, off, off offset:16
+// GFX940: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16 a2, v4, s6
+// GFX940: scratch_load_short_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, v4, off
+// GFX940: scratch_load_short_d16 a2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16 a2, v4, off offset:16
+// GFX940: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16 a2, off, s6
+// GFX940: scratch_load_short_d16 a2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, off, s6 offset:16
+// GFX940: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16 a2, off, off
+// GFX940: scratch_load_short_d16 a2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16 a2, off, off offset:16
+// GFX940: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16 v2, v4, s6
+// GFX940: scratch_load_short_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, v4, off
+// GFX940: scratch_load_short_d16 v2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16 v2, v4, off offset:16
+// GFX940: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16 v2, off, s6
+// GFX940: scratch_load_short_d16 v2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, off, s6 offset:16
+// GFX940: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16 v2, off, off
+// GFX940: scratch_load_short_d16 v2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16 v2, off, off offset:16
+// GFX940: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi a2, v4, s6
+// GFX940: scratch_load_short_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, v4, off
+// GFX940: scratch_load_short_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi a2, v4, off offset:16
+// GFX940: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi a2, off, s6
+// GFX940: scratch_load_short_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, off, s6 offset:16
+// GFX940: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_short_d16_hi a2, off, off
+// GFX940: scratch_load_short_d16_hi a2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi a2, off, off offset:16
+// GFX940: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_short_d16_hi v2, v4, s6
+// GFX940: scratch_load_short_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, v4, s6 offset:16
+// GFX940: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, v4, off
+// GFX940: scratch_load_short_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi v2, v4, off offset:16
+// GFX940: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi v2, off, s6
+// GFX940: scratch_load_short_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, off, s6 offset:16
+// GFX940: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_short_d16_hi v2, off, off
+// GFX940: scratch_load_short_d16_hi v2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_short_d16_hi v2, off, off offset:16
+// GFX940: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sshort a2, v4, s6
+// GFX940: scratch_load_sshort a2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sshort a2, v4, s6 offset:16
+// GFX940: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_sshort a2, v4, off
+// GFX940: scratch_load_sshort a2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sshort a2, v4, off offset:16
+// GFX940: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_sshort a2, off, s6
+// GFX940: scratch_load_sshort a2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sshort a2, off, s6 offset:16
+// GFX940: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_sshort a2, off, off
+// GFX940: scratch_load_sshort a2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sshort a2, off, off offset:16
+// GFX940: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_sshort v2, v4, s6
+// GFX940: scratch_load_sshort v2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sshort v2, v4, s6 offset:16
+// GFX940: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_sshort v2, v4, off
+// GFX940: scratch_load_sshort v2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sshort v2, v4, off offset:16
+// GFX940: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_sshort v2, off, s6
+// GFX940: scratch_load_sshort v2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sshort v2, off, s6 offset:16
+// GFX940: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_sshort v2, off, off
+// GFX940: scratch_load_sshort v2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_sshort v2, off, off offset:16
+// GFX940: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte a2, v4, s6
+// GFX940: scratch_load_ubyte a2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, v4, off
+// GFX940: scratch_load_ubyte a2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte a2, v4, off offset:16
+// GFX940: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte a2, off, s6
+// GFX940: scratch_load_ubyte a2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, off, s6 offset:16
+// GFX940: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte a2, off, off
+// GFX940: scratch_load_ubyte a2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte a2, off, off offset:16
+// GFX940: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte v2, v4, s6
+// GFX940: scratch_load_ubyte v2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, v4, off
+// GFX940: scratch_load_ubyte v2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte v2, v4, off offset:16
+// GFX940: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte v2, off, s6
+// GFX940: scratch_load_ubyte v2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, off, s6 offset:16
+// GFX940: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte v2, off, off
+// GFX940: scratch_load_ubyte v2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte v2, off, off offset:16
+// GFX940: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 a2, v4, s6
+// GFX940: scratch_load_ubyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, v4, off
+// GFX940: scratch_load_ubyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 a2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 a2, off, s6
+// GFX940: scratch_load_ubyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16 a2, off, off
+// GFX940: scratch_load_ubyte_d16 a2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 a2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16 v2, v4, s6
+// GFX940: scratch_load_ubyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, v4, off
+// GFX940: scratch_load_ubyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 v2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 v2, off, s6
+// GFX940: scratch_load_ubyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16 v2, off, off
+// GFX940: scratch_load_ubyte_d16 v2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16 v2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, s6
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, off
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi a2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, s6
+// GFX940: scratch_load_ubyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, off
+// GFX940: scratch_load_ubyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi a2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, s6
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, off
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi v2, v4, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, s6
+// GFX940: scratch_load_ubyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, s6 offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, off
+// GFX940: scratch_load_ubyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ubyte_d16_hi v2, off, off offset:16
+// GFX940: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ushort a2, v4, s6
+// GFX940: scratch_load_ushort a2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ushort a2, v4, s6 offset:16
+// GFX940: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+
+scratch_load_ushort a2, v4, off
+// GFX940: scratch_load_ushort a2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ushort a2, v4, off offset:16
+// GFX940: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+
+scratch_load_ushort a2, off, s6
+// GFX940: scratch_load_ushort a2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ushort a2, off, s6 offset:16
+// GFX940: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+
+scratch_load_ushort a2, off, off
+// GFX940: scratch_load_ushort a2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ushort a2, off, off offset:16
+// GFX940: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+
+scratch_load_ushort v2, v4, s6
+// GFX940: scratch_load_ushort v2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ushort v2, v4, s6 offset:16
+// GFX940: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+
+scratch_load_ushort v2, v4, off
+// GFX940: scratch_load_ushort v2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ushort v2, v4, off offset:16
+// GFX940: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+
+scratch_load_ushort v2, off, s6
+// GFX940: scratch_load_ushort v2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ushort v2, off, s6 offset:16
+// GFX940: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+
+scratch_load_ushort v2, off, off
+// GFX940: scratch_load_ushort v2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_load_ushort v2, off, off offset:16
+// GFX940: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+
+scratch_store_byte v4, a2, s6
+// GFX940: scratch_store_byte v4, a2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte v4, a2, s6 offset:16
+// GFX940: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte v4, a2, off
+// GFX940: scratch_store_byte v4, a2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte v4, a2, off offset:16
+// GFX940: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte off, a2, s6
+// GFX940: scratch_store_byte off, a2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte off, a2, s6 offset:16
+// GFX940: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte off, a2, off
+// GFX940: scratch_store_byte off, a2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte off, a2, off offset:16
+// GFX940: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte v4, v2, s6
+// GFX940: scratch_store_byte v4, v2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte v4, v2, s6 offset:16
+// GFX940: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte v4, v2, off
+// GFX940: scratch_store_byte v4, v2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte v4, v2, off offset:16
+// GFX940: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte off, v2, s6
+// GFX940: scratch_store_byte off, v2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte off, v2, s6 offset:16
+// GFX940: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte off, v2, off
+// GFX940: scratch_store_byte off, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_byte off, v2, off offset:16
+// GFX940: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi v4, a2, s6
+// GFX940: scratch_store_byte_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi v4, a2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi v4, a2, off
+// GFX940: scratch_store_byte_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi v4, a2, off offset:16
+// GFX940: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi off, a2, s6
+// GFX940: scratch_store_byte_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi off, a2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_byte_d16_hi off, a2, off
+// GFX940: scratch_store_byte_d16_hi off, a2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi off, a2, off offset:16
+// GFX940: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_byte_d16_hi v4, v2, s6
+// GFX940: scratch_store_byte_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi v4, v2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi v4, v2, off
+// GFX940: scratch_store_byte_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi v4, v2, off offset:16
+// GFX940: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi off, v2, s6
+// GFX940: scratch_store_byte_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi off, v2, s6 offset:16
+// GFX940: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_byte_d16_hi off, v2, off
+// GFX940: scratch_store_byte_d16_hi off, v2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_byte_d16_hi off, v2, off offset:16
+// GFX940: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dword v4, a2, s6
+// GFX940: scratch_store_dword v4, a2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dword v4, a2, s6 offset:16
+// GFX940: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dword v4, a2, off
+// GFX940: scratch_store_dword v4, a2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dword v4, a2, off offset:16
+// GFX940: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dword off, a2, s6
+// GFX940: scratch_store_dword off, a2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dword off, a2, s6 offset:16
+// GFX940: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dword off, a2, off
+// GFX940: scratch_store_dword off, a2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dword off, a2, off offset:16
+// GFX940: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dword v4, v2, s6
+// GFX940: scratch_store_dword v4, v2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dword v4, v2, s6 offset:16
+// GFX940: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dword v4, v2, off
+// GFX940: scratch_store_dword v4, v2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dword v4, v2, off offset:16
+// GFX940: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dword off, v2, s6
+// GFX940: scratch_store_dword off, v2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dword off, v2, s6 offset:16
+// GFX940: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dword off, v2, off
+// GFX940: scratch_store_dword off, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dword off, v2, off offset:16
+// GFX940: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], s6
+// GFX940: scratch_store_dwordx2 v4, a[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], off
+// GFX940: scratch_store_dwordx2 v4, a[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx2 v4, a[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx2 off, a[2:3], s6
+// GFX940: scratch_store_dwordx2 off, a[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx2 off, a[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx2 off, a[2:3], off
+// GFX940: scratch_store_dwordx2 off, a[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx2 off, a[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], s6
+// GFX940: scratch_store_dwordx2 v4, v[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], off
+// GFX940: scratch_store_dwordx2 v4, v[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 v4, v[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 off, v[2:3], s6
+// GFX940: scratch_store_dwordx2 off, v[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx2 off, v[2:3], s6 offset:16
+// GFX940: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx2 off, v[2:3], off
+// GFX940: scratch_store_dwordx2 off, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx2 off, v[2:3], off offset:16
+// GFX940: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], s6
+// GFX940: scratch_store_dwordx3 v4, a[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], off
+// GFX940: scratch_store_dwordx3 v4, a[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx3 v4, a[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx3 off, a[2:4], s6
+// GFX940: scratch_store_dwordx3 off, a[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx3 off, a[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx3 off, a[2:4], off
+// GFX940: scratch_store_dwordx3 off, a[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx3 off, a[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], s6
+// GFX940: scratch_store_dwordx3 v4, v[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], off
+// GFX940: scratch_store_dwordx3 v4, v[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 v4, v[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 off, v[2:4], s6
+// GFX940: scratch_store_dwordx3 off, v[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx3 off, v[2:4], s6 offset:16
+// GFX940: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx3 off, v[2:4], off
+// GFX940: scratch_store_dwordx3 off, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx3 off, v[2:4], off offset:16
+// GFX940: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], s6
+// GFX940: scratch_store_dwordx4 v4, a[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], off
+// GFX940: scratch_store_dwordx4 v4, a[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx4 v4, a[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_dwordx4 off, a[2:5], s6
+// GFX940: scratch_store_dwordx4 off, a[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx4 off, a[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_dwordx4 off, a[2:5], off
+// GFX940: scratch_store_dwordx4 off, a[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx4 off, a[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], s6
+// GFX940: scratch_store_dwordx4 v4, v[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], off
+// GFX940: scratch_store_dwordx4 v4, v[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 v4, v[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 off, v[2:5], s6
+// GFX940: scratch_store_dwordx4 off, v[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx4 off, v[2:5], s6 offset:16
+// GFX940: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_dwordx4 off, v[2:5], off
+// GFX940: scratch_store_dwordx4 off, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_dwordx4 off, v[2:5], off offset:16
+// GFX940: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short v4, a2, s6
+// GFX940: scratch_store_short v4, a2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short v4, a2, s6 offset:16
+// GFX940: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short v4, a2, off
+// GFX940: scratch_store_short v4, a2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short v4, a2, off offset:16
+// GFX940: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short off, a2, s6
+// GFX940: scratch_store_short off, a2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short off, a2, s6 offset:16
+// GFX940: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short off, a2, off
+// GFX940: scratch_store_short off, a2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short off, a2, off offset:16
+// GFX940: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short v4, v2, s6
+// GFX940: scratch_store_short v4, v2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short v4, v2, s6 offset:16
+// GFX940: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short v4, v2, off
+// GFX940: scratch_store_short v4, v2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short v4, v2, off offset:16
+// GFX940: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short off, v2, s6
+// GFX940: scratch_store_short off, v2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short off, v2, s6 offset:16
+// GFX940: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short off, v2, off
+// GFX940: scratch_store_short off, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short off, v2, off offset:16
+// GFX940: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi v4, a2, s6
+// GFX940: scratch_store_short_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi v4, a2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi v4, a2, off
+// GFX940: scratch_store_short_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi v4, a2, off offset:16
+// GFX940: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi off, a2, s6
+// GFX940: scratch_store_short_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi off, a2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+
+scratch_store_short_d16_hi off, a2, off
+// GFX940: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi off, a2, off offset:16
+// GFX940: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+
+scratch_store_short_d16_hi v4, v2, s6
+// GFX940: scratch_store_short_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi v4, v2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi v4, v2, off
+// GFX940: scratch_store_short_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi v4, v2, off offset:16
+// GFX940: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi off, v2, s6
+// GFX940: scratch_store_short_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi off, v2, s6 offset:16
+// GFX940: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+
+scratch_store_short_d16_hi off, v2, off
+// GFX940: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]
+
+scratch_store_short_d16_hi off, v2, off offset:16
+// GFX940: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]
diff --git a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s
index 0f4fae7546b7f..b0d5d2d390cec 100644
--- a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s
+++ b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s
@@ -222,23 +222,23 @@ scratch_store_dword off, v2, s1 offset:12
// FIXME: Should error about multiple offsets
scratch_load_dword v1, v2, s1
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
// VI-ERR: error: instruction not supported on this GPU
scratch_load_dword v1, v2, s1 offset:32
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
// VI-ERR: error: instruction not supported on this GPU
scratch_store_dword v1, v2, s1
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
// VI-ERR: error: instruction not supported on this GPU
scratch_store_dword v1, v2, s1 offset:32
-// GFX10-ERR: error: invalid operand for instruction
-// GFX9-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: operands are not valid for this GPU or mode
+// GFX9-ERR: error: operands are not valid for this GPU or mode
// VI-ERR: error: instruction not supported on this GPU
scratch_load_dword v1, off, exec_hi
diff --git a/llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt b/llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt
new file mode 100644
index 0000000000000..b3182c98cb1b7
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/flat-scratch-gfx940.txt
@@ -0,0 +1,1057 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX940 %s
+
+# GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword a2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dword a2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dword v2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dword v2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dword v2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte a2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte a2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte v2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte v2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort a2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort a2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_sshort a2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_sshort v2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort v2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_sshort v2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte a2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte a2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte v2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte v2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort a2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort a2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02
+
+# GFX940: scratch_load_ushort a2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02
+
+# GFX940: scratch_load_ushort v2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02]
+0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort v2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02
+
+# GFX940: scratch_load_ushort v2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02]
+0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02
+
+# GFX940: scratch_store_byte v4, a2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte v4, a2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte off, a2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte off, a2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte v4, v2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte v4, v2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte off, v2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte off, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword v4, a2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword v4, a2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword off, a2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dword off, a2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dword v4, v2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword v4, v2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword off, v2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dword off, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short v4, a2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short v4, a2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short off, a2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short off, a2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short v4, v2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short v4, v2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short off, v2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short off, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00]
+0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00
+
+# GFX940: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00]
+0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00
More information about the llvm-commits
mailing list