[llvm] [AMDGPU][MC] GFX9 - Support NV bit in FLAT instructions in pre-GFX90A (PR #154237)

Jun Wang via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 11:38:19 PDT 2025


https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/154237

>From 23457ee3272b6b10d8f8b6c38b3cf30fd8fea2f8 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Mon, 18 Aug 2025 17:30:33 -0700
Subject: [PATCH 1/4] [AMDGPU][MC] GFX9 - Support NV bit in FLAT instructions
 in pre-GFX90A targets

This patch enables support of the NV (non-volatile) bit in FLAT
instructions in GFX9 (pre-GFX90A) targets.
---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  10 +-
 .../Disassembler/AMDGPUDisassembler.cpp       |  13 +
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp |   5 +-
 .../MCTargetDesc/AMDGPUMCCodeEmitter.cpp      |  10 +
 llvm/test/MC/AMDGPU/gfx9_asm_flat.s           | 858 +++++++++++++++++
 .../test/MC/Disassembler/AMDGPU/gfx9_flat.txt | 864 ++++++++++++++++++
 6 files changed, 1758 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2ced4d6813766..e30b2c1347ee6 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5295,7 +5295,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
       Error(S, "scale_offset is not supported on this GPU");
     }
-    if (CPol & CPol::NV) {
+    if ((CPol & CPol::NV) && (!isGFX9() || isGFX90A())) {
+      // nv not supported on GFX90A+
       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
       StringRef CStr(S.getPointer());
       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
@@ -7089,6 +7090,13 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
   unsigned Enabled = 0, Seen = 0;
   for (;;) {
     SMLoc S = getLoc();
+
+    if (isGFX9() && trySkipId("nv")) {
+      Enabled |= CPol::NV;
+      Seen |= CPol::NV;
+      continue;
+    }
+
     bool Disabling;
     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
     if (!CPol)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index d3db1b7394675..9103d1c41e879 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -788,6 +788,19 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
+  if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::FLAT) {
+    if (isGFX9() && !isGFX90A()) {
+      // Pre-GFX90A GFX9's use bit 55 as NV.
+      assert(Bytes_.size() >= 8);
+      if (Bytes_[6] & 0x80) { // check bit 55
+        int CPolIdx =
+            AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
+        MI.getOperand(CPolIdx).setImm(MI.getOperand(CPolIdx).getImm() |
+                                      AMDGPU::CPol::NV);
+      }
+    }
+  }
+
   if ((MCII->get(MI.getOpcode()).TSFlags &
        (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
       (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f098e7a3c6c67..4f4e6dc4e4136 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -177,7 +177,10 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
   if (Imm & ~CPol::ALL_pregfx12)
-    O << " /* unexpected cache policy bit */";
+    if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
+      O << " nv";
+    else
+      O << " /* unexpected cache policy bit */";
 }
 
 void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index bf212bbca934c..63a89ec319a55 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -394,6 +394,16 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
     Encoding |= getImplicitOpSelHiEncoding(Opcode);
   }
 
+  // For GFX90A+ targets, bit 55 of the FLAT instructions is the ACC bit
+  // indicating the use of AGPRs. However, pre-GFX90A, the same bit is for NV.
+  if ((Desc.TSFlags & SIInstrFlags::FLAT) && AMDGPU::isGFX9(STI) &&
+      !AMDGPU::isGFX90A(STI)) {
+    int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
+    unsigned Cpol = MI.getOperand(Idx).getImm();
+    if (Cpol & AMDGPU::CPol::NV)
+      Encoding |= (UINT64_C(1) << 55);
+  }
+
   // GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
   // Documentation requires dst to be encoded as EXEC (0x7E),
   // but it looks like the actual value encoded for dst operand
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s
index 5cc3d2533a149..7687c0a478bd9 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s
@@ -24,6 +24,18 @@ flat_load_ubyte v5, v[1:2] offset:4095 glc
 flat_load_ubyte v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_ubyte v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_sbyte v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05]
 
@@ -48,6 +60,18 @@ flat_load_sbyte v5, v[1:2] offset:4095 glc
 flat_load_sbyte v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_sbyte v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_ushort v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05]
 
@@ -72,6 +96,18 @@ flat_load_ushort v5, v[1:2] offset:4095 glc
 flat_load_ushort v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_ushort v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ushort v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ushort v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ushort v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_sshort v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05]
 
@@ -96,6 +132,18 @@ flat_load_sshort v5, v[1:2] offset:4095 glc
 flat_load_sshort v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_sshort v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sshort v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sshort v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sshort v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_dword v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05]
 
@@ -120,6 +168,18 @@ flat_load_dword v5, v[1:2] offset:4095 glc
 flat_load_dword v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_dword v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dword v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dword v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dword v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_dwordx2 v[5:6], v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05]
 
@@ -144,6 +204,18 @@ flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc
 flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_dwordx2 v[5:6], v[1:2] nv
+// CHECK: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_dwordx3 v[5:7], v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05]
 
@@ -168,6 +240,18 @@ flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc
 flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_dwordx3 v[5:7], v[1:2] nv
+// CHECK: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_dwordx4 v[5:8], v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05]
 
@@ -192,6 +276,18 @@ flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc
 flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_dwordx4 v[5:8], v[1:2] nv
+// CHECK: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05]
+
 flat_store_byte v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00]
 
@@ -216,6 +312,18 @@ flat_store_byte v[1:2], v2 offset:4095 glc
 flat_store_byte v[1:2], v2 offset:4095 slc
 // CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_byte v[1:2], v2 nv
+// CHECK: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_byte v[1:2], v2 offset:7 nv
+// CHECK: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_byte v[1:2], v2 offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_byte v[1:2], v2 offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00]
+
 flat_store_byte_d16_hi v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00]
 
@@ -240,6 +348,18 @@ flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc
 flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc
 // CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_byte_d16_hi v[1:2], v2 nv
+// CHECK: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_byte_d16_hi v[1:2], v2 offset:7 nv
+// CHECK: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00]
+
 flat_store_short v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00]
 
@@ -264,6 +384,18 @@ flat_store_short v[1:2], v2 offset:4095 glc
 flat_store_short v[1:2], v2 offset:4095 slc
 // CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_short v[1:2], v2 nv
+// CHECK: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_short v[1:2], v2 offset:7 nv
+// CHECK: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_short v[1:2], v2 offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_short v[1:2], v2 offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00]
+
 flat_store_short_d16_hi v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00]
 
@@ -288,6 +420,18 @@ flat_store_short_d16_hi v[1:2], v2 offset:4095 glc
 flat_store_short_d16_hi v[1:2], v2 offset:4095 slc
 // CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_short_d16_hi v[1:2], v2 nv
+// CHECK: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_short_d16_hi v[1:2], v2 offset:7 nv
+// CHECK: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00]
+
 flat_store_dword v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00]
 
@@ -312,6 +456,18 @@ flat_store_dword v[1:2], v2 offset:4095 glc
 flat_store_dword v[1:2], v2 offset:4095 slc
 // CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_dword v[1:2], v2 nv
+// CHECK: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dword v[1:2], v2 offset:7 nv
+// CHECK: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dword v[1:2], v2 offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dword v[1:2], v2 offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00]
+
 flat_store_dwordx2 v[1:2], v[2:3] offset:4095
 // CHECK: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00]
 
@@ -336,6 +492,18 @@ flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc
 flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc
 // CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_dwordx2 v[1:2], v[2:3] nv
+// CHECK: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv
+// CHECK: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00]
+
 flat_store_dwordx3 v[1:2], v[2:4] offset:4095
 // CHECK: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00]
 
@@ -360,6 +528,18 @@ flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc
 flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc
 // CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_dwordx3 v[1:2], v[2:4] nv
+// CHECK: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv
+// CHECK: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00]
+
 flat_store_dwordx4 v[1:2], v[2:5] offset:4095
 // CHECK: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00]
 
@@ -384,6 +564,18 @@ flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc
 flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc
 // CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00]
 
+flat_store_dwordx4 v[1:2], v[2:5] nv
+// CHECK: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv
+// CHECK: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00]
+
+flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00]
+
 flat_load_ubyte_d16 v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05]
 
@@ -408,6 +600,18 @@ flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc
 flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_ubyte_d16 v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte_d16 v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_ubyte_d16_hi v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05]
 
@@ -432,6 +636,18 @@ flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc
 flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_ubyte_d16_hi v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_sbyte_d16 v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05]
 
@@ -456,6 +672,18 @@ flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc
 flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_sbyte_d16 v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte_d16 v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_sbyte_d16_hi v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05]
 
@@ -480,6 +708,18 @@ flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc
 flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_sbyte_d16_hi v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_short_d16 v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05]
 
@@ -504,6 +744,18 @@ flat_load_short_d16 v5, v[1:2] offset:4095 glc
 flat_load_short_d16 v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_short_d16 v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_short_d16 v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_short_d16 v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_short_d16 v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05]
+
 flat_load_short_d16_hi v5, v[1:2] offset:4095
 // CHECK: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05]
 
@@ -528,6 +780,18 @@ flat_load_short_d16_hi v5, v[1:2] offset:4095 glc
 flat_load_short_d16_hi v5, v[1:2] offset:4095 slc
 // CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05]
 
+flat_load_short_d16_hi v5, v[1:2] nv
+// CHECK: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_short_d16_hi v5, v[1:2] offset:7 nv
+// CHECK: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05]
+
+flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05]
+
 flat_atomic_swap v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00]
 
@@ -552,6 +816,18 @@ flat_atomic_swap v0, v[1:2], v2 offset:4095 glc
 flat_atomic_swap v[1:2], v2 offset:4095 slc
 // CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00]
 
+flat_atomic_swap v[1:2], v2 nv
+// CHECK: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_swap v[1:2], v2 offset:7 nv
+// CHECK: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_swap v[1:2], v2 offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00]
+
 flat_atomic_cmpswap v[1:2], v[2:3] offset:4095
 // CHECK: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00]
 
@@ -576,6 +852,18 @@ flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc
 flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc
 // CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00]
 
+flat_atomic_cmpswap v[1:2], v[2:3] nv
+// CHECK: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv
+// CHECK: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00]
+
 flat_atomic_add v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00]
 
@@ -600,6 +888,18 @@ flat_atomic_add v0, v[1:2], v2 offset:4095 glc
 flat_atomic_add v[1:2], v2 offset:4095 slc
 // CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00]
 
+flat_atomic_add v[1:2], v2 nv
+// CHECK: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_add v[1:2], v2 offset:7 nv
+// CHECK: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv
+// CHECK: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00]
+
+flat_atomic_add v[1:2], v2 offset:4095 slc nv
+// CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00]
+
 flat_atomic_sub v[1:2], v2 offset:4095
 // CHECK: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00]
 
@@ -1197,6 +1497,18 @@ global_load_ubyte v5, v1, s[4:5] offset:-1 glc
 global_load_ubyte v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_ubyte v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_sbyte v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1242,6 +1554,18 @@ global_load_sbyte v5, v1, s[4:5] offset:-1 glc
 global_load_sbyte v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_sbyte v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_ushort v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1287,6 +1611,18 @@ global_load_ushort v5, v1, s[4:5] offset:-1 glc
 global_load_ushort v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_ushort v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ushort v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ushort v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ushort v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_sshort v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1332,6 +1668,18 @@ global_load_sshort v5, v1, s[4:5] offset:-1 glc
 global_load_sshort v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_sshort v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sshort v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sshort v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sshort v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_dword v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1377,6 +1725,18 @@ global_load_dword v5, v1, s[4:5] offset:-1 glc
 global_load_dword v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_dword v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_dword v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_dword v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_dword v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1422,6 +1782,18 @@ global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc
 global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_dwordx2 v[5:6], v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1467,6 +1839,15 @@ global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc
 global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_dwordx3 v[5:7], v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05]
+global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05]
+global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05]
+global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1512,6 +1893,15 @@ global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc
 global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_dwordx4 v[5:8], v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05]
+global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05]
+global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05]
+global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05]
+
 global_store_byte v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1557,6 +1947,18 @@ global_store_byte v1, v2, s[6:7] offset:-1 glc
 global_store_byte v1, v2, s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_byte v1, v2, s[6:7] nv
+// CHECK: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_byte v1, v2, s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_byte v1, v2, s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_byte v1, v2, s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00]
+
 global_store_byte_d16_hi v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1602,6 +2004,18 @@ global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc
 global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_byte_d16_hi v1, v2, s[6:7] nv
+// CHECK: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00]
+
 global_store_short v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1647,6 +2061,18 @@ global_store_short v1, v2, s[6:7] offset:-1 glc
 global_store_short v1, v2, s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_short v1, v2, s[6:7] nv
+// CHECK: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_short v1, v2, s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_short v1, v2, s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_short v1, v2, s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00]
+
 global_store_short_d16_hi v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1692,6 +2118,18 @@ global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc
 global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_short_d16_hi v1, v2, s[6:7] nv
+// CHECK: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00]
+
 global_store_dword v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1737,6 +2175,18 @@ global_store_dword v1, v2, s[6:7] offset:-1 glc
 global_store_dword v1, v2, s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_dword v1, v2, s[6:7] nv
+// CHECK: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dword v1, v2, s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dword v1, v2, s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dword v1, v2, s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00]
+
 global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1782,6 +2232,18 @@ global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc
 global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_dwordx2 v1, v[2:3], s[6:7] nv
+// CHECK: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00]
+
 global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1827,6 +2289,18 @@ global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc
 global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_dwordx3 v1, v[2:4], s[6:7] nv
+// CHECK: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00]
+
 global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x06,0x00]
 
@@ -1872,6 +2346,18 @@ global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc
 global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x06,0x00]
 
+global_store_dwordx4 v1, v[2:5], s[6:7] nv
+// CHECK: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00]
+
+global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00]
+
 global_load_ubyte_d16 v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1917,6 +2403,18 @@ global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc
 global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_ubyte_d16 v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x04,0x05]
 
@@ -1962,6 +2460,18 @@ global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc
 global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_ubyte_d16_hi v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_sbyte_d16 v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x04,0x05]
 
@@ -2007,6 +2517,18 @@ global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc
 global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_sbyte_d16 v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x04,0x05]
 
@@ -2052,6 +2574,18 @@ global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc
 global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_sbyte_d16_hi v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_short_d16 v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x04,0x05]
 
@@ -2097,6 +2631,18 @@ global_load_short_d16 v5, v1, s[4:5] offset:-1 glc
 global_load_short_d16 v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_short_d16 v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_short_d16 v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05]
+
 global_load_short_d16_hi v5, v1, s[4:5] offset:-1
 // CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x04,0x05]
 
@@ -2142,6 +2688,18 @@ global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc
 global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc
 // CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x04,0x05]
 
+global_load_short_d16_hi v5, v1, s[4:5] nv
+// CHECK: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv
+// CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05]
+
+global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05]
+
 global_atomic_swap v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x06,0x00]
 
@@ -2187,6 +2745,18 @@ global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc
 global_atomic_swap v1, v2, s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x06,0x00]
 
+global_atomic_swap v1, v2, s[6:7] nv
+// CHECK: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_swap v1, v2, s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00]
+
 global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x06,0x00]
 
@@ -2232,6 +2802,18 @@ global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc
 global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x06,0x00]
 
+global_atomic_cmpswap v1, v[2:3], s[6:7] nv
+// CHECK: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00]
+
 global_atomic_add v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x06,0x00]
 
@@ -2277,6 +2859,18 @@ global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc
 global_atomic_add v1, v2, s[6:7] offset:-1 slc
 // CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x06,0x00]
 
+global_atomic_add v1, v2, s[6:7] nv
+// CHECK: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_add v1, v2, s[6:7] offset:-1 nv
+// CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv
+// CHECK: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00]
+
+global_atomic_add v1, v2, s[6:7] offset:-1 slc nv
+// CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00]
+
 global_atomic_sub v1, v2, s[6:7] offset:-1
 // CHECK: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x06,0x00]
 
@@ -3357,6 +3951,18 @@ scratch_load_ubyte v5, off, s2 offset:-1 glc
 scratch_load_ubyte v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_ubyte v5, off, s2 nv
+// CHECK: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_sbyte v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05]
 
@@ -3402,6 +4008,18 @@ scratch_load_sbyte v5, off, s2 offset:-1 glc
 scratch_load_sbyte v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_sbyte v5, off, s2 nv
+// CHECK: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_ushort v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05]
 
@@ -3447,6 +4065,18 @@ scratch_load_ushort v5, off, s2 offset:-1 glc
 scratch_load_ushort v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_ushort v5, off, s2 nv
+// CHECK: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ushort v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ushort v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ushort v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_sshort v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05]
 
@@ -3492,6 +4122,18 @@ scratch_load_sshort v5, off, s2 offset:-1 glc
 scratch_load_sshort v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_sshort v5, off, s2 nv
+// CHECK: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sshort v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sshort v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sshort v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_dword v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05]
 
@@ -3537,6 +4179,18 @@ scratch_load_dword v5, off, s2 offset:-1 glc
 scratch_load_dword v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_dword v5, off, s2 nv
+// CHECK: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dword v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dword v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dword v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_dwordx2 v[5:6], off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05]
 
@@ -3582,6 +4236,18 @@ scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc
 scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_dwordx2 v[5:6], off, s2 nv
+// CHECK: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_dwordx3 v[5:7], off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05]
 
@@ -3627,6 +4293,18 @@ scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc
 scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_dwordx3 v[5:7], off, s2 nv
+// CHECK: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_dwordx4 v[5:8], off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05]
 
@@ -3672,6 +4350,18 @@ scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc
 scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_dwordx4 v[5:8], off, s2 nv
+// CHECK: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_store_byte off, v2, s3 offset:-1
 // CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00]
 
@@ -3717,6 +4407,18 @@ scratch_store_byte off, v2, s3 offset:-1 glc
 scratch_store_byte off, v2, s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_byte off, v2, s3 nv
+// CHECK: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_byte off, v2, s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_byte off, v2, s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_byte off, v2, s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_store_byte_d16_hi off, v2, s3 offset:-1
 // CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00]
 
@@ -3762,6 +4464,18 @@ scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc
 scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_byte_d16_hi off, v2, s3 nv
+// CHECK: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_store_short off, v2, s3 offset:-1
 // CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00]
 
@@ -3807,6 +4521,18 @@ scratch_store_short off, v2, s3 offset:-1 glc
 scratch_store_short off, v2, s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_short off, v2, s3 nv
+// CHECK: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_short off, v2, s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_short off, v2, s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_short off, v2, s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_store_short_d16_hi off, v2, s3 offset:-1
 // CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00]
 
@@ -3852,6 +4578,18 @@ scratch_store_short_d16_hi off, v2, s3 offset:-1 glc
 scratch_store_short_d16_hi off, v2, s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_short_d16_hi off, v2, s3 nv
+// CHECK: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_short_d16_hi off, v2, s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_store_dword off, v2, s3 offset:-1
 // CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00]
 
@@ -3897,6 +4635,18 @@ scratch_store_dword off, v2, s3 offset:-1 glc
 scratch_store_dword off, v2, s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_dword off, v2, s3 nv
+// CHECK: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dword off, v2, s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dword off, v2, s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dword off, v2, s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_store_dwordx2 off, v[2:3], s3 offset:-1
 // CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00]
 
@@ -3942,6 +4692,18 @@ scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc
 scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_dwordx2 off, v[2:3], s3 nv
+// CHECK: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_store_dwordx3 off, v[2:4], s3 offset:-1
 // CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00]
 
@@ -3987,6 +4749,18 @@ scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc
 scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_dwordx3 off, v[2:4], s3 nv
+// CHECK: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_store_dwordx4 off, v[2:5], s3 offset:-1
 // CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00]
 
@@ -4032,6 +4806,18 @@ scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc
 scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc
 // CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00]
 
+scratch_store_dwordx4 off, v[2:5], s3 nv
+// CHECK: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv
+// CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00]
+
+scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00]
+
 scratch_load_ubyte_d16 v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05]
 
@@ -4077,6 +4863,18 @@ scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc
 scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_ubyte_d16 v5, off, s2 nv
+// CHECK: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_ubyte_d16_hi v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05]
 
@@ -4122,6 +4920,18 @@ scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc
 scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_ubyte_d16_hi v5, off, s2 nv
+// CHECK: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_sbyte_d16 v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05]
 
@@ -4167,6 +4977,18 @@ scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc
 scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_sbyte_d16 v5, off, s2 nv
+// CHECK: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_sbyte_d16_hi v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05]
 
@@ -4212,6 +5034,18 @@ scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc
 scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_sbyte_d16_hi v5, off, s2 nv
+// CHECK: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_short_d16 v5, off, s2 offset:-1
 // CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05]
 
@@ -4254,6 +5088,18 @@ scratch_load_short_d16 v5, off, s2 offset:-4096
 scratch_load_short_d16 v5, off, s2 offset:-1 glc
 // CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_short_d16 v5, off, s2 nv
+// CHECK: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_short_d16 v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_short_d16 v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_short_d16 v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05]
+
 scratch_load_short_d16 v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05]
 
@@ -4302,6 +5148,18 @@ scratch_load_short_d16_hi v5, off, s2 offset:-1 glc
 scratch_load_short_d16_hi v5, off, s2 offset:-1 slc
 // CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05]
 
+scratch_load_short_d16_hi v5, off, s2 nv
+// CHECK: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_short_d16_hi v5, off, s2 offset:-1 nv
+// CHECK: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv
+// CHECK: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05]
+
+scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv
+// CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05]
+
 global_load_dword v[2:3], off lds
 // CHECK: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt
index 0ee659e207c91..4c06585a4c2eb 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt
@@ -21,6 +21,18 @@
 # CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_ubyte v5, v[1:2] nv           ; encoding: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte v5, v[1:2] offset:7 nv  ; encoding: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_sbyte v5, v[1:2] offset:4095  ; encoding: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05
 
@@ -42,6 +54,18 @@
 # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_sbyte v5, v[1:2] nv           ; encoding: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte v5, v[1:2] offset:7 nv  ; encoding: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_ushort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05
 
@@ -63,6 +87,18 @@
 # CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_ushort v5, v[1:2] nv          ; encoding: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ushort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ushort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_sshort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05
 
@@ -84,6 +120,18 @@
 # CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_sshort v5, v[1:2] nv          ; encoding: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sshort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sshort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_dword v5, v[1:2] offset:4095  ; encoding: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05
 
@@ -105,6 +153,18 @@
 # CHECK: flat_load_dword v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_dword v5, v[1:2] nv           ; encoding: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dword v5, v[1:2] offset:7 nv  ; encoding: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dword v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dword v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05
 
@@ -126,6 +186,18 @@
 # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_dwordx2 v[5:6], v[1:2] nv     ; encoding: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05
 
@@ -147,6 +219,18 @@
 # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_dwordx3 v[5:7], v[1:2] nv     ; encoding: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05
 
@@ -168,6 +252,18 @@
 # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_dwordx4 v[5:8], v[1:2] nv     ; encoding: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_store_byte v[1:2], v2 offset:4095  ; encoding: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00
 
@@ -189,6 +285,18 @@
 # CHECK: flat_store_byte v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_byte v[1:2], v2 nv           ; encoding: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_byte v[1:2], v2 offset:7 nv  ; encoding: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_byte v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_byte v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00
 
@@ -210,6 +318,18 @@
 # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_byte_d16_hi v[1:2], v2 nv    ; encoding: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_store_short v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00
 
@@ -231,6 +351,18 @@
 # CHECK: flat_store_short v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_short v[1:2], v2 nv          ; encoding: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_short v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_short v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_short v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00
 
@@ -252,6 +384,18 @@
 # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_short_d16_hi v[1:2], v2 nv   ; encoding: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_store_dword v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00
 
@@ -273,6 +417,18 @@
 # CHECK: flat_store_dword v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_dword v[1:2], v2 nv          ; encoding: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dword v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dword v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dword v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00
 
@@ -294,6 +450,18 @@
 # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_dwordx2 v[1:2], v[2:3] nv    ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00
 
@@ -315,6 +483,18 @@
 # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_dwordx3 v[1:2], v[2:4] nv    ; encoding: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv ; encoding: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv ; encoding: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00
 
@@ -336,6 +516,18 @@
 # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00
 
+# CHECK: flat_store_dwordx4 v[1:2], v[2:5] nv    ; encoding: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
+0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv ; encoding: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
+0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv ; encoding: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00
+
+# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00
+
 # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05
 
@@ -357,6 +549,18 @@
 # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_ubyte_d16 v5, v[1:2] nv       ; encoding: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05
 
@@ -378,6 +582,18 @@
 # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] nv    ; encoding: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05
 
@@ -399,6 +615,18 @@
 # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_sbyte_d16 v5, v[1:2] nv       ; encoding: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05
 
@@ -420,6 +648,18 @@
 # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] nv    ; encoding: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05
 
@@ -441,6 +681,18 @@
 # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_short_d16 v5, v[1:2] nv       ; encoding: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_short_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05
 
@@ -462,6 +714,18 @@
 # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05]
 0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05
 
+# CHECK: flat_load_short_d16_hi v5, v[1:2] nv    ; encoding: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
+0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
+0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05
+
+# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05]
+0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05
+
 # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00
 
@@ -483,6 +747,18 @@
 # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00
 
+# CHECK: flat_atomic_swap v[1:2], v2 nv          ; encoding: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
+0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_swap v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
+0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00
+
 # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00
 
@@ -504,6 +780,18 @@
 # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00
 
+# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] nv   ; encoding: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
+0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
+0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00
+
 # CHECK: flat_atomic_add v[1:2], v2 offset:4095  ; encoding: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00
 
@@ -525,6 +813,18 @@
 # CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00
 
+# CHECK: flat_atomic_add v[1:2], v2 nv           ; encoding: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
+0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_add v[1:2], v2 offset:7 nv  ; encoding: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
+0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00
+
+# CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00]
+0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00
+
 # CHECK: flat_atomic_sub v[1:2], v2 offset:4095  ; encoding: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00]
 0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00
 
@@ -1017,6 +1317,18 @@
 # CHECK: global_load_ubyte v5, v[1:2], off       ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_ubyte v5, v1, s[4:5] nv     ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_sbyte v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1026,6 +1338,18 @@
 # CHECK: global_load_sbyte v5, v[1:2], off       ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_sbyte v5, v1, s[4:5] nv     ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_ushort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1035,6 +1359,18 @@
 # CHECK: global_load_ushort v5, v[1:2], off      ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_ushort v5, v1, s[4:5] nv    ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_sshort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1044,6 +1380,18 @@
 # CHECK: global_load_sshort v5, v[1:2], off      ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_sshort v5, v1, s[4:5] nv    ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_dword v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1053,6 +1401,18 @@
 # CHECK: global_load_dword v5, v[1:2], off       ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_dword v5, v1, s[4:5] nv     ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_dwordx2 v[5:6], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1062,6 +1422,18 @@
 # CHECK: global_load_dwordx2 v[5:6], v[1:2], off ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] nv ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_dwordx3 v[5:7], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1071,6 +1443,18 @@
 # CHECK: global_load_dwordx3 v[5:7], v[1:2], off ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] nv ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_dwordx4 v[5:8], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1080,6 +1464,18 @@
 # CHECK: global_load_dwordx4 v[5:8], v[1:2], off ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] nv ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_store_byte v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1089,6 +1485,18 @@
 # CHECK: global_store_byte v[1:2], v2, off       ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_byte v1, v2, s[6:7] nv     ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_store_byte_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1098,6 +1506,18 @@
 # CHECK: global_store_byte_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_store_short v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1107,6 +1527,18 @@
 # CHECK: global_store_short v[1:2], v2, off      ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_short v1, v2, s[6:7] nv    ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_short v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_short v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_short v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_store_short_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1116,6 +1548,18 @@
 # CHECK: global_store_short_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_short_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_store_dword v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1125,6 +1569,18 @@
 # CHECK: global_store_dword v[1:2], v2, off      ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_dword v1, v2, s[6:7] nv    ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_store_dwordx2 v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1134,6 +1590,18 @@
 # CHECK: global_store_dwordx2 v[1:2], v[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_store_dwordx3 v[1:2], v[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1143,6 +1611,18 @@
 # CHECK: global_store_dwordx3 v[1:2], v[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] nv ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_store_dwordx4 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00
 
@@ -1152,6 +1632,18 @@
 # CHECK: global_store_dwordx4 v[1:2], v[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00
 
+# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] nv ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00]
+0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00
+
+# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00
+
 # CHECK: global_load_ubyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1161,6 +1653,18 @@
 # CHECK: global_load_ubyte_d16 v5, v[1:2], off   ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1170,6 +1674,18 @@
 # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_sbyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1179,6 +1695,18 @@
 # CHECK: global_load_sbyte_d16 v5, v[1:2], off   ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1188,6 +1716,18 @@
 # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_short_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1197,6 +1737,18 @@
 # CHECK: global_load_short_d16 v5, v[1:2], off   ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_short_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_load_short_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05]
 0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05
 
@@ -1206,6 +1758,18 @@
 # CHECK: global_load_short_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05]
 0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05
 
+# CHECK: global_load_short_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05]
+0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05
+
+# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05]
+0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05
+
 # CHECK: global_atomic_swap v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00
 
@@ -1215,6 +1779,18 @@
 # CHECK: global_atomic_swap v[1:2], v2, off      ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00
 
+# CHECK: global_atomic_swap v1, v2, s[6:7] nv    ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00]
+0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00
+
 # CHECK: global_atomic_cmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00
 
@@ -1236,6 +1812,18 @@
 # CHECK: global_atomic_cmpswap v1, v[2:3], v[4:5], off glc ; encoding: [0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01]
 0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01
 
+# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00]
+0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00
+
 # CHECK: global_atomic_add v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00
 
@@ -1245,6 +1833,18 @@
 # CHECK: global_atomic_add v[1:2], v2, off       ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00]
 0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00
 
+# CHECK: global_atomic_add v1, v2, s[6:7] nv     ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00]
+0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00
+
+# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00]
+0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00
+
 # CHECK: global_atomic_sub v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00]
 0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00
 
@@ -1503,6 +2103,18 @@
 # CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_ubyte v5, off, s2 nv       ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05
 
@@ -1542,6 +2154,18 @@
 # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_sbyte v5, off, s2 nv       ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_ushort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05
 
@@ -1581,6 +2205,18 @@
 # CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_ushort v5, off, s2 nv      ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ushort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ushort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_sshort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05
 
@@ -1620,6 +2256,18 @@
 # CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_sshort v5, off, s2 nv      ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sshort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sshort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_dword v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05
 
@@ -1659,6 +2307,18 @@
 # CHECK: scratch_load_dword v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_dword v5, off, s2 nv       ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dword v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dword v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dword v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05
 
@@ -1698,6 +2358,18 @@
 # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_dwordx2 v[5:6], off, s2 nv ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05
 
@@ -1737,6 +2409,18 @@
 # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_dwordx3 v[5:7], off, s2 nv ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05
 
@@ -1776,6 +2460,18 @@
 # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_dwordx4 v[5:8], off, s2 nv ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_store_byte off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00
 
@@ -1815,6 +2511,18 @@
 # CHECK: scratch_store_byte off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_byte off, v2, s3 nv       ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_byte off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_byte off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_byte off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00
 
@@ -1854,6 +2562,18 @@
 # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_byte_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_store_short off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00
 
@@ -1893,6 +2613,18 @@
 # CHECK: scratch_store_short off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_short off, v2, s3 nv      ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_short off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_short off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_short off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00
 
@@ -1932,6 +2664,18 @@
 # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_short_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_store_dword off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00
 
@@ -1971,6 +2715,18 @@
 # CHECK: scratch_store_dword off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_dword off, v2, s3 nv      ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dword off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dword off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dword off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00
 
@@ -2010,6 +2766,18 @@
 # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_dwordx2 off, v[2:3], s3 nv ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00
 
@@ -2049,6 +2817,18 @@
 # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_dwordx3 off, v[2:4], s3 nv ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00
 
@@ -2088,6 +2868,18 @@
 # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00]
 0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00
 
+# CHECK: scratch_store_dwordx4 off, v[2:5], s3 nv ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00]
+0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00
+
+# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00]
+0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00
+
 # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05
 
@@ -2127,6 +2919,18 @@
 # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_ubyte_d16 v5, off, s2 nv   ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05
 
@@ -2166,6 +2970,18 @@
 # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05
 
@@ -2205,6 +3021,18 @@
 # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_sbyte_d16 v5, off, s2 nv   ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05
 
@@ -2244,6 +3072,18 @@
 # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05
 
@@ -2283,6 +3123,18 @@
 # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_short_d16 v5, off, s2 nv   ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05
 
@@ -2322,6 +3174,18 @@
 # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05]
 0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05
 
+# CHECK: scratch_load_short_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05]
+0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05
+
+# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05]
+0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05
+
 # CHECK: global_load_dword v[2:3], off lds       ; encoding: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00]
 0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00
 

>From 87b1908f053183be115196fa47cf4733c01fb88a Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Tue, 26 Aug 2025 17:16:49 -0700
Subject: [PATCH 2/4] Define new real instructions in table-gen files so that
 there's no need to do the encoding/decoding in CPP code.

---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  10 ++
 .../Disassembler/AMDGPUDisassembler.cpp       |  13 --
 llvm/lib/Target/AMDGPU/FLATInstructions.td    | 116 ++++++++++++------
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp |   3 +-
 .../MCTargetDesc/AMDGPUMCCodeEmitter.cpp      |  10 --
 5 files changed, 88 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 23339b2ad228e..3db1e72aaffa4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2329,6 +2329,16 @@ def isGFX8GFX9NotGFX90A :
             " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
   AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
 
+def isGFX9NotGFX90A :
+  Predicate<"!Subtarget->hasGFX90AInsts() &&"
+            " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
+  AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>;
+
+def isGFX8orGFX9After908 :
+  Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||"
+            " ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">,
+  AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>;
+
 def isGFX90AOnly :
   Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
   AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9103d1c41e879..d3db1b7394675 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -788,19 +788,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
-  if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::FLAT) {
-    if (isGFX9() && !isGFX90A()) {
-      // Pre-GFX90A GFX9's use bit 55 as NV.
-      assert(Bytes_.size() >= 8);
-      if (Bytes_[6] & 0x80) { // check bit 55
-        int CPolIdx =
-            AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
-        MI.getOperand(CPolIdx).setImm(MI.getOperand(CPolIdx).getImm() |
-                                      AMDGPU::CPol::NV);
-      }
-    }
-  }
-
   if ((MCII->get(MI.getOpcode()).TSFlags &
        (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
       (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7e5ae25ff30e6..9bd53be7b1ae6 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
   bits<7> saddr;
   bits<10> vdst;
 
-  bits<5> cpol;
+  bits<6> cpol;
 
   // Only valid on gfx9
   bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2581,7 +2581,28 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
                   !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
 }
 
+class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
+  FLAT_Real_vi <op, ps, has_sccb> {
+  let AssemblerPredicate = isGFX9NotGFX90A;
+  let Subtarget = SIEncodingFamily.GFX9;
+  let DecoderNamespace = "GFX9";
+  let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
+}
+
+multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
+  let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
+    def _vi: FLAT_Real_vi<op, ps, has_sccb>;
+  }
+  def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
+}
+
 multiclass FLAT_Real_AllAddr_vi<bits<7> op,
+  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+  defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+  defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+}
+
+multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
   def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
   def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
@@ -2591,19 +2612,18 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
   FLAT_Real <op, ps>,
   SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
   let AssemblerPredicate = isGFX940Plus;
-  let DecoderNamespace = "GFX9";
+  let DecoderNamespace = "GFX940";
   let Inst{13} = ps.sve;
   let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
 }
 
 multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
-  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
-    let AssemblerPredicate = isGFX8GFX9NotGFX940;
-    let OtherPredicates = [isGFX8GFX9NotGFX940];
-  }
-  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
-    let DecoderNamespace = "GFX9";
+  let OtherPredicates = [isGFX8GFX9NotGFX940] in {
+    defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
   }
+
+  defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+
   let AssemblerPredicate = isGFX940Plus in {
     def _VE_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
     def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2616,11 +2636,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
 
   let OtherPredicates = [isGFX8GFX9NotGFX940] in {
-    def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
-      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
+    let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
+      defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
     }
-    def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
-      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
+    let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
+      defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
     }
   }
 
@@ -2636,32 +2656,41 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
   def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
 }
 
-def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
-def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
-def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
-def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
-def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
-def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
-def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
-def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
-
-def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
-def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
-def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
-def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
-def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
-def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
-def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
-def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
-
-def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
-def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
-def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
-def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
-def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
-def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
+defm FLAT_LOAD_UBYTE_vi         : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
+defm FLAT_LOAD_SBYTE_vi         : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
+defm FLAT_LOAD_USHORT_vi        : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
+defm FLAT_LOAD_SSHORT_vi        : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
+defm FLAT_LOAD_DWORD_vi         : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
+defm FLAT_LOAD_DWORDX2_vi       : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
+defm FLAT_LOAD_DWORDX4_vi       : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
+defm FLAT_LOAD_DWORDX3_vi       : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
+
+defm FLAT_STORE_BYTE_vi         : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
+defm FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
+defm FLAT_STORE_SHORT_vi        : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
+defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
+defm FLAT_STORE_DWORD_vi        : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
+defm FLAT_STORE_DWORDX2_vi      : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
+defm FLAT_STORE_DWORDX4_vi      : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
+defm FLAT_STORE_DWORDX3_vi      : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
+
+defm FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
+defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
+defm FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
+defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
+defm FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
+defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
 
 multiclass FLAT_Real_Atomics_vi <bits<7> op,
+  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+  defvar ps = !cast<FLAT_Pseudo>(NAME);
+  defm ""     : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
+  defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
+}
+
+// FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
+// variant is needed.
+multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
   defvar ps = !cast<FLAT_Pseudo>(NAME);
   def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
@@ -2672,6 +2701,13 @@ multiclass FLAT_Real_Atomics_vi <bits<7> op,
 multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
   FLAT_Real_AllAddr_vi<op, has_sccb> {
+  defm _RTN  : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
+  defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
+}
+
+multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
+  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
+  FLAT_Real_AllAddr_vi_only<op, has_sccb> {
   def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
   def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
 
@@ -2838,10 +2874,10 @@ let AssemblerPredicate = isGFX940Plus in {
   defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
   defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
   defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
-  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d>;
-  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e>;
-  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52>;
-  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
+  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi_only<0x4d>;
+  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi_only<0x4e>;
+  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi_only<0x52>;
+  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only<0x52>;
 } // End AssemblerPredicate = isGFX940Plus
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 4f4e6dc4e4136..aa31835337290 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -176,11 +176,12 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
     O << " dlc";
   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
-  if (Imm & ~CPol::ALL_pregfx12)
+  if (Imm & ~CPol::ALL_pregfx12) {
     if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
       O << " nv";
     else
       O << " /* unexpected cache policy bit */";
+  }
 }
 
 void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 63a89ec319a55..bf212bbca934c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -394,16 +394,6 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
     Encoding |= getImplicitOpSelHiEncoding(Opcode);
   }
 
-  // For GFX90A+ targets, bit 55 of the FLAT instructions is the ACC bit
-  // indicating the use of AGPRs. However, pre-GFX90A, the same bit is for NV.
-  if ((Desc.TSFlags & SIInstrFlags::FLAT) && AMDGPU::isGFX9(STI) &&
-      !AMDGPU::isGFX90A(STI)) {
-    int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
-    unsigned Cpol = MI.getOperand(Idx).getImm();
-    if (Cpol & AMDGPU::CPol::NV)
-      Encoding |= (UINT64_C(1) << 55);
-  }
-
   // GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
   // Documentation requires dst to be encoded as EXEC (0x7E),
   // but it looks like the actual value encoded for dst operand

>From 9e8d903a8c5df01fb9478c1c3e8058e6a6b401b8 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Fri, 19 Sep 2025 16:11:54 -0700
Subject: [PATCH 3/4] This commit: (1) improve .td file after resolving merge
 conflict (2) replace target check with a prediate (3) add negative tests.

---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  8 ++-
 llvm/lib/Target/AMDGPU/FLATInstructions.td    | 52 +++++++++++--------
 llvm/test/MC/AMDGPU/gfx90a_err.s              | 43 +++++++++++++++
 llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s         | 52 +++++++++----------
 llvm/test/MC/AMDGPU/gfx942_err.s              | 28 ++++++++++
 5 files changed, 132 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index e30b2c1347ee6..0d3d3568949a9 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1585,6 +1585,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
 
   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
 
+  bool isFlatInstAndNVAllowed(const MCInst &Inst) const {
+    uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+    return (TSFlags & SIInstrFlags::FLAT) && isGFX9() && !isGFX90A();
+  }
+
   AMDGPUTargetStreamer &getTargetStreamer() {
     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
     return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -5295,8 +5300,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
       Error(S, "scale_offset is not supported on this GPU");
     }
-    if ((CPol & CPol::NV) && (!isGFX9() || isGFX90A())) {
-      // nv not supported on GFX90A+
+    if ((CPol & CPol::NV) && !isFlatInstAndNVAllowed(Inst)) {
       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
       StringRef CStr(S.getPointer());
       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9bd53be7b1ae6..52bda9d5d7e96 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -2581,6 +2581,11 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
                   !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
 }
 
+class FLAT_Real_vi_ex_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
+  FLAT_Real_vi <op, ps, has_sccb> {
+  let AssemblerPredicate = isGFX8orGFX9After908;
+}
+
 class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
   FLAT_Real_vi <op, ps, has_sccb> {
   let AssemblerPredicate = isGFX9NotGFX90A;
@@ -2590,9 +2595,7 @@ class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
 }
 
 multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
-  let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
-    def _vi: FLAT_Real_vi<op, ps, has_sccb>;
-  }
+  def _vi: FLAT_Real_vi_ex_gfx9<op, ps, has_sccb>;
   def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
 }
 
@@ -2602,10 +2605,10 @@ multiclass FLAT_Real_AllAddr_vi<bits<7> op,
   defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
 }
 
-multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
+multiclass FLAT_Real_AllAddr_vi_ex_gfx9<bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
-  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
-  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+  def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+  def _SADDR_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
 }
 
 class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
@@ -2684,18 +2687,18 @@ defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
 multiclass FLAT_Real_Atomics_vi <bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
   defvar ps = !cast<FLAT_Pseudo>(NAME);
-  defm ""     : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
+  defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
   defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
+  def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
 }
 
-// FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
-// variant is needed.
-multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
+multiclass FLAT_Real_Atomics_vi_ex_gfx9 <bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
   defvar ps = !cast<FLAT_Pseudo>(NAME);
-  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
-  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
-  def _RTN_agpr_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
+  def _vi     : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
+  def _RTN_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
+
+  def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
 }
 
 multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
@@ -2703,16 +2706,19 @@ multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
   FLAT_Real_AllAddr_vi<op, has_sccb> {
   defm _RTN  : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
   defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
+
+  def _RTN_agpr_vi  : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
+  def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
 }
 
-multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
+multiclass FLAT_Global_Real_Atomics_vi_ex_gfx9<bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
-  FLAT_Real_AllAddr_vi_only<op, has_sccb> {
-  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
-  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
+  FLAT_Real_AllAddr_vi_ex_gfx9<op, has_sccb> {
+  def _RTN_vi  : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
+  def _SADDR_RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
 
-  def _RTN_agpr_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
-  def _SADDR_RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
+  def _RTN_agpr_vi  : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
+  def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
 }
 
 defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40>;
@@ -2874,10 +2880,10 @@ let AssemblerPredicate = isGFX940Plus in {
   defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
   defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
   defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
-  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi_only<0x4d>;
-  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi_only<0x4e>;
-  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi_only<0x52>;
-  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only<0x52>;
+  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi_ex_gfx9<0x4d>;
+  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi_ex_gfx9<0x4e>;
+  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi_ex_gfx9<0x52>;
+  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_ex_gfx9<0x52>;
 } // End AssemblerPredicate = isGFX940Plus
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s
index ff0dfb371bbbf..78e4f86ec1b90 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_err.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_err.s
@@ -674,3 +674,46 @@ v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
 v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
 
+// nv bit in FLAT instructions
+flat_load_ubyte v5, v[2:3] offset:4095 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+flat_load_ubyte a5, v[2:3] offset:4095 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+flat_store_dword v[2:3], v5 offset:4095 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+flat_store_dword v[2:3], a5 offset:4095 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_load_ubyte v5, v[2:3], off offset:-1 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_store_byte v[2:3], v5, off offset:-1 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_atomic_add v[2:3], v5, off nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_atomic_swap a1, v[2:3], a2, off glc nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_atomic_swap_x2 v[2:3], v[4:5], off nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_atomic_swap_x2 v[2:3], a[4:5], off nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+scratch_load_ubyte v5, off, s2 offset:-1 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+scratch_load_ubyte a5, off, s2 offset:-1 nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+scratch_store_dword v2, v3, off nv
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
index 43673d1d49c79..f97f26e2d0430 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
@@ -706,107 +706,107 @@ flat_load_short_d16_hi a5, v[2:3] offset:4095 glc
 flat_load_short_d16_hi a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_swap a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_add a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x09,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_add a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_sub a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x0d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_sub a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_smin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x11,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_smin a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_umin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x15,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_umin a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_smax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x19,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_smax a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_umax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x1d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_umax a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_and a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x21,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_and a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_or a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x25,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_or a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_xor a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x29,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_xor a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_inc a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x2d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_inc a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_dec a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x31,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_dec a0, v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc
 
 // GFX90A: flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x99,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x9d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa1,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa5,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa9,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xad,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xb1,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx942_err.s b/llvm/test/MC/AMDGPU/gfx942_err.s
index fd59a01b34a04..dc51bab65aa04 100644
--- a/llvm/test/MC/AMDGPU/gfx942_err.s
+++ b/llvm/test/MC/AMDGPU/gfx942_err.s
@@ -125,3 +125,31 @@ global_load_dword v[2:3], off lds
 
 scratch_load_dword v2, off lds
 // GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+// nv bit in FLAT instructions
+flat_load_ubyte v5, v[2:3] offset:4095 nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+flat_store_dword v[2:3], v5 offset:4095 nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+flat_atomic_add_f32 v[2:3], v5 nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_load_dword v2, v[2:3], off sc0 nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_store_dword v[2:3], v5 off sc0 nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_atomic_add_f64 v[0:1], v[2:3], off sc1 nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+global_atomic_swap v0, v[2:3], v5 off sc0 nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+scratch_load_lds_dword v2, off nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
+
+scratch_store_dword v2, v3, off nv
+// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU

>From 9f08a5e23f37a25f2ca63dd1c7d65733ea03d65f Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Thu, 25 Sep 2025 11:37:45 -0700
Subject: [PATCH 4/4] Minor change

---
 llvm/lib/Target/AMDGPU/FLATInstructions.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 52bda9d5d7e96..6ab9ecbc68233 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -2591,7 +2591,7 @@ class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
   let AssemblerPredicate = isGFX9NotGFX90A;
   let Subtarget = SIEncodingFamily.GFX9;
   let DecoderNamespace = "GFX9";
-  let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
+  let Inst{55} = cpol{CPolBit.NV}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
 }
 
 multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {



More information about the llvm-commits mailing list