[llvm] [AMDGPU] Add VDSDIR instructions for GFX12 (PR #75197)

Mirko BrkuĊĦanin via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 13 02:38:44 PST 2023


https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/75197

>From e89c469c98a466cb52047700f5dd8ffcbf729a44 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Tue, 12 Dec 2023 16:13:09 +0100
Subject: [PATCH 1/4] [AMDGPU] Add VDSDIR instructions for GFX12

---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  16 ++
 llvm/lib/Target/AMDGPU/LDSDIRInstructions.td  | 118 ++++++++--
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp |  20 ++
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h   |   4 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |   2 +
 llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp    |   4 +-
 .../AMDGPU/llvm.amdgcn.lds.direct.load.ll     |  36 +--
 .../AMDGPU/llvm.amdgcn.lds.param.load.ll      |  39 ++--
 llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s        | 199 +++++++++++++++++
 .../Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt | 206 ++++++++++++++++++
 10 files changed, 587 insertions(+), 57 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 92427335c0ad2f..cb2ea1f144e587 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -166,6 +166,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     ImmTyEndpgm,
     ImmTyWaitVDST,
     ImmTyWaitEXP,
+    ImmTyWaitVAVDst,
+    ImmTyWaitVMVSrc,
   };
 
   // Immediate operand kind.
@@ -908,6 +910,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   bool isEndpgm() const;
   bool isWaitVDST() const;
   bool isWaitEXP() const;
+  bool isWaitVAVDst() const;
+  bool isWaitVMVSrc() const;
 
   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
     return std::bind(P, *this);
@@ -1028,6 +1032,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   }
 
   static void printImmTy(raw_ostream& OS, ImmTy Type) {
+    // clang-format off
     switch (Type) {
     case ImmTyNone: OS << "None"; break;
     case ImmTyGDS: OS << "GDS"; break;
@@ -1085,7 +1090,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     case ImmTyEndpgm: OS << "Endpgm"; break;
     case ImmTyWaitVDST: OS << "WaitVDST"; break;
     case ImmTyWaitEXP: OS << "WaitEXP"; break;
+    case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
+    case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
     }
+    // clang-format on
   }
 
   void print(raw_ostream &OS) const override {
@@ -9130,6 +9138,14 @@ bool AMDGPUOperand::isWaitVDST() const {
   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
 }
 
+bool AMDGPUOperand::isWaitVAVDst() const {
+  return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
+}
+
+bool AMDGPUOperand::isWaitVMVSrc() const {
+  return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
+}
+
 //===----------------------------------------------------------------------===//
 // VINTERP
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
index 4956a158677495..572798ae10c2b7 100644
--- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
+++ b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
@@ -1,4 +1,4 @@
-//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
+//===-- LDSDIRInstructions.td - LDS/DS Direct Instruction Definitions -----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// LDSDIR encoding
+// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+)
 //===----------------------------------------------------------------------===//
 
 class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
@@ -27,8 +27,27 @@ class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
   let Inst{7-0} = vdst;
 }
 
+class VDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+  // encoding fields
+  bits<2> attrchan;
+  bits<6> attr;
+  bits<4> waitvdst;
+  bits<8> vdst;
+  bits<1> waitvsrc;
+
+  // encoding
+  let Inst{31-24} = 0xce; // encoding
+  let Inst{23} = waitvsrc;
+  let Inst{22} = 0x0; // reserved
+  let Inst{21-20} = op;
+  let Inst{19-16} = waitvdst;
+  let Inst{15-10} = !if(is_direct, ?, attr);
+  let Inst{9-8} = !if(is_direct, ?, attrchan);
+  let Inst{7-0} = vdst;
+}
+
 //===----------------------------------------------------------------------===//
-// LDSDIR Classes
+// LDSDIR/VDSDIR Classes
 //===----------------------------------------------------------------------===//
 
 class LDSDIR_getIns<bit direct> {
@@ -38,10 +57,15 @@ class LDSDIR_getIns<bit direct> {
   );
 }
 
-class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
-    (outs VGPR_32:$vdst),
-    LDSDIR_getIns<direct>.ret,
-    asm> {
+class VDSDIR_getIns<bit direct> {
+  dag ret = !if(direct,
+    (ins WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc),
+    (ins InterpAttr:$attr, InterpAttrChan:$attrchan, WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc)
+  );
+}
+
+class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> : InstSI<
+    (outs VGPR_32:$vdst), ins, asm> {
   let LDSDIR = 1;
   let EXP_CNT = 1;
 
@@ -60,8 +84,8 @@ class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
   let is_direct = direct;
 }
 
-class LDSDIR_Pseudo<string opName, bit direct> :
-  LDSDIR_Common<opName, "", direct>,
+class DSDIR_Pseudo<string opName, dag ins, bit direct> :
+  DSDIR_Common<opName, "", ins, direct>,
   SIMCInstr<opName, SIEncodingFamily.NONE> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
@@ -74,22 +98,31 @@ class LDSDIR_getAsm<bit direct> {
   );
 }
 
-class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
-  LDSDIR_Common<lds.Mnemonic,
-                lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
-                lds.is_direct>,
-  SIMCInstr <lds.Mnemonic, subtarget>,
-  LDSDIRe<op, lds.is_direct> {
+class VDSDIR_getAsm<bit direct> {
+  string ret = !if(direct,
+    " $vdst$waitvdst$waitvsrc",
+    " $vdst, $attr$attrchan$waitvdst$waitvsrc"
+  );
+}
+
+class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> :
+  DSDIR_Common<lds.Mnemonic,
+               lds.Mnemonic # asm,
+               ins,
+               lds.is_direct>,
+  SIMCInstr <lds.Mnemonic, subtarget> {
   let isPseudo = 0;
   let isCodeGenOnly = 0;
 }
 
 //===----------------------------------------------------------------------===//
-// LDS Direct Instructions
+// LDS/DS Direct Instructions
 //===----------------------------------------------------------------------===//
 
-def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
-def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
+let SubtargetPredicate = isGFX11Only in {
+
+def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>;
+def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>;
 
 def : GCNPat <
   (f32 (int_amdgcn_lds_direct_load M0)),
@@ -101,16 +134,53 @@ def : GCNPat <
   (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
 >;
 
+} // End SubtargetPredicate = isGFX11Only
+
+let SubtargetPredicate = isGFX12Plus in {
+
+def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>;
+def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_direct_load M0)),
+  (DS_DIRECT_LOAD 0, 1)
+>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+  (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1)
+>;
+
+} // End SubtargetPredicate = isGFX12Only
+
 //===----------------------------------------------------------------------===//
-// GFX11+
+// GFX11
 //===----------------------------------------------------------------------===//
 
-multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
-  def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
-    let AssemblerPredicate = isGFX11Plus;
+multiclass DSDIR_Real_gfx11<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+  def _gfx11 : DSDIR_Real<lds, lds.InOperandList,
+                           LDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX11>,
+               LDSDIRe<op, lds.is_direct> {
+    let AssemblerPredicate = isGFX11Only;
     let DecoderNamespace = "GFX11";
   }
 }
 
-defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
-defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;
+defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>;
+defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>;
+
+//===----------------------------------------------------------------------===//
+// GFX12+
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx12<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+  def _gfx12 : DSDIR_Real<lds, lds.InOperandList,
+                           VDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX12>,
+               VDSDIRe<op, lds.is_direct> {
+    let AssemblerPredicate = isGFX12Plus;
+    let DecoderNamespace = "GFX12";
+  }
+}
+
+defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>;
+defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 57f74ae08b35c4..d99d343f81c40b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -631,6 +631,26 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
   printU4ImmDecOperand(MI, OpNo, O);
 }
 
+void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  uint8_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm != 0) {
+    O << " wait_va_vdst:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  uint8_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm != 0) {
+    O << " wait_vm_vsrc:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  }
+}
+
 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
                                     const MCSubtargetInfo &STI,
                                     raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 95c26de6299ef5..f2f985fa5b1a87 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -161,6 +161,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                     raw_ostream &O);
   void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O);
+  void printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O, unsigned N);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9e60bdda5ef3af..e9a5beb99498b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1129,6 +1129,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
 
 def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
 def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
+def WaitVAVDst : NamedIntOperand<i8, "wait_va_vdst">;
+def WaitVMVSrc : NamedIntOperand<i8, "wait_vm_vsrc">;
 
 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
   let OperandNamespace = "AMDGPU";
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 59d6ccf513bb9e..5e6c34992930be 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
         }
         continue;
       } else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
-                 Opcode == AMDGPU::LDS_DIRECT_LOAD) {
+                 Opcode == AMDGPU::DS_PARAM_LOAD ||
+                 Opcode == AMDGPU::LDS_DIRECT_LOAD ||
+                 Opcode == AMDGPU::DS_DIRECT_LOAD) {
         // Mark these STRICTWQM, but only for the instruction, not its operands.
         // This avoid unnecessarily marking M0 as requiring WQM.
         InstrInfo &II = Instructions[&MI];
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 313bd8525c6fd4..195c5dabb4d461 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -1,23 +1,27 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-; GFX11-LABEL: {{^}}lds_direct_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_direct_load:
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
 define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
                                        i32 inreg %arg1, i32 inreg %arg2) #0 {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 5a8b03e50e2ee6..1ab753d75fe031 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -1,25 +1,32 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-; GFX11-LABEL: {{^}}lds_param_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_param_load:
+; GCN: s_mov_b32 m0
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x
-; GFX11: s_waitcnt expcnt(4)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(3)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x
+; GCN: s_waitcnt expcnt(4)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(3)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
 define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
 main_body:
   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
new file mode 100644
index 00000000000000..4d40eae1c7e7fc
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -0,0 +1,199 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
+
+ds_direct_load v1 wait_va_vdst:15
+// GFX12: ds_direct_load v1 wait_va_vdst:15  ; encoding: [0x01,0x00,0x1f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14
+// GFX12: ds_direct_load v2 wait_va_vdst:14  ; encoding: [0x02,0x00,0x1e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13
+// GFX12: ds_direct_load v3 wait_va_vdst:13  ; encoding: [0x03,0x00,0x1d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12
+// GFX12: ds_direct_load v4 wait_va_vdst:12  ; encoding: [0x04,0x00,0x1c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11
+// GFX12: ds_direct_load v5 wait_va_vdst:11  ; encoding: [0x05,0x00,0x1b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10
+// GFX12: ds_direct_load v6 wait_va_vdst:10  ; encoding: [0x06,0x00,0x1a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9
+// GFX12: ds_direct_load v7 wait_va_vdst:9   ; encoding: [0x07,0x00,0x19,0xce]
+
+ds_direct_load v8 wait_va_vdst:8
+// GFX12: ds_direct_load v8 wait_va_vdst:8   ; encoding: [0x08,0x00,0x18,0xce]
+
+ds_direct_load v9 wait_va_vdst:7
+// GFX12: ds_direct_load v9 wait_va_vdst:7   ; encoding: [0x09,0x00,0x17,0xce]
+
+ds_direct_load v10 wait_va_vdst:6
+// GFX12: ds_direct_load v10 wait_va_vdst:6  ; encoding: [0x0a,0x00,0x16,0xce]
+
+ds_direct_load v11 wait_va_vdst:5
+// GFX12: ds_direct_load v11 wait_va_vdst:5  ; encoding: [0x0b,0x00,0x15,0xce]
+
+ds_direct_load v12 wait_va_vdst:4
+// GFX12: ds_direct_load v12 wait_va_vdst:4  ; encoding: [0x0c,0x00,0x14,0xce]
+
+ds_direct_load v13 wait_va_vdst:3
+// GFX12: ds_direct_load v13 wait_va_vdst:3  ; encoding: [0x0d,0x00,0x13,0xce]
+
+ds_direct_load v14 wait_va_vdst:2
+// GFX12: ds_direct_load v14 wait_va_vdst:2  ; encoding: [0x0e,0x00,0x12,0xce]
+
+ds_direct_load v15 wait_va_vdst:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1  ; encoding: [0x0f,0x00,0x11,0xce]
+
+ds_direct_load v16 wait_va_vdst:0
+// GFX12: ds_direct_load v16  ; encoding: [0x10,0x00,0x10,0xce]
+
+ds_direct_load v17
+// GFX12: ds_direct_load v17  ; encoding: [0x11,0x00,0x10,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15   ; encoding: [0x01,0x00,0x0f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14   ; encoding: [0x02,0x01,0x0e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13   ; encoding: [0x03,0x02,0x0d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12   ; encoding: [0x04,0x03,0x0c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11   ; encoding: [0x05,0x00,0x0b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10   ; encoding: [0x06,0x04,0x0a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9    ; encoding: [0x07,0x09,0x09,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8    ; encoding: [0x08,0x0e,0x08,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7    ; encoding: [0x09,0x13,0x07,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6  ; encoding: [0x0a,0x2c,0x06,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5  ; encoding: [0x0b,0x59,0x05,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3  ; encoding: [0x0d,0x80,0x03,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2  ; encoding: [0x0e,0x81,0x02,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1  ; encoding: [0x0f,0x82,0x01,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0
+// GFX12: ds_param_load v16, attr32.w  ; encoding: [0x10,0x83,0x00,0xce]
+
+ds_param_load v17, attr32.w
+// GFX12: ds_param_load v17, attr32.w  ; encoding: [0x11,0x83,0x00,0xce]
+
+ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1  ; encoding: [0x01,0x00,0x9f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1  ; encoding: [0x02,0x00,0x9e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1  ; encoding: [0x03,0x00,0x9d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1  ; encoding: [0x04,0x00,0x9c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1  ; encoding: [0x05,0x00,0x9b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1  ; encoding: [0x06,0x00,0x9a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1   ; encoding: [0x07,0x00,0x99,0xce]
+
+ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1   ; encoding: [0x08,0x00,0x98,0xce]
+
+ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1   ; encoding: [0x09,0x00,0x97,0xce]
+
+ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1  ; encoding: [0x0a,0x00,0x96,0xce]
+
+ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1  ; encoding: [0x0b,0x00,0x95,0xce]
+
+ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1  ; encoding: [0x0c,0x00,0x94,0xce]
+
+ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1  ; encoding: [0x0d,0x00,0x93,0xce]
+
+ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1  ; encoding: [0x0e,0x00,0x92,0xce]
+
+ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1  ; encoding: [0x0f,0x00,0x91,0xce]
+
+ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_direct_load v16 wait_vm_vsrc:1  ; encoding: [0x10,0x00,0x90,0xce]
+
+ds_direct_load v17 wait_vm_vsrc:1
+// GFX12: ds_direct_load v17 wait_vm_vsrc:1  ; encoding: [0x11,0x00,0x90,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1   ; encoding: [0x01,0x00,0x8f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1   ; encoding: [0x02,0x01,0x8e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1   ; encoding: [0x03,0x02,0x8d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1   ; encoding: [0x04,0x03,0x8c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1   ; encoding: [0x05,0x00,0x8b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1   ; encoding: [0x06,0x04,0x8a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1    ; encoding: [0x07,0x09,0x89,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1    ; encoding: [0x08,0x0e,0x88,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1    ; encoding: [0x09,0x13,0x87,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1  ; encoding: [0x0a,0x2c,0x86,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1  ; encoding: [0x0b,0x59,0x85,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1  ; encoding: [0x0d,0x80,0x83,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1  ; encoding: [0x0e,0x81,0x82,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1  ; encoding: [0x0f,0x82,0x81,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_param_load v16, attr32.w wait_vm_vsrc:1  ; encoding: [0x10,0x83,0x80,0xce]
+
+ds_param_load v17, attr32.w wait_vm_vsrc:1
+// GFX12: ds_param_load v17, attr32.w wait_vm_vsrc:1  ; encoding: [0x11,0x83,0x80,0xce]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
new file mode 100644
index 00000000000000..47a3ee614dd9fc
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -0,0 +1,206 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6          ; encoding: [0x0a,0x00,0x16,0xce]
+0x0a,0x00,0x16,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5          ; encoding: [0x0b,0x00,0x15,0xce]
+0x0b,0x00,0x15,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4          ; encoding: [0x0c,0x00,0x14,0xce]
+0x0c,0x00,0x14,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3          ; encoding: [0x0d,0x00,0x13,0xce]
+0x0d,0x00,0x13,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2          ; encoding: [0x0e,0x00,0x12,0xce]
+0x0e,0x00,0x12,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1          ; encoding: [0x0f,0x00,0x11,0xce]
+0x0f,0x00,0x11,0xce
+
+# GFX12: ds_direct_load v16                         ; encoding: [0x10,0x00,0x10,0xce]
+0x10,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v17                         ; encoding: [0x11,0x00,0x10,0xce]
+0x11,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15          ; encoding: [0x01,0x00,0x1f,0xce]
+0x01,0x00,0x1f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14          ; encoding: [0x02,0x00,0x1e,0xce]
+0x02,0x00,0x1e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13          ; encoding: [0x03,0x00,0x1d,0xce]
+0x03,0x00,0x1d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12          ; encoding: [0x04,0x00,0x1c,0xce]
+0x04,0x00,0x1c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11          ; encoding: [0x05,0x00,0x1b,0xce]
+0x05,0x00,0x1b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10          ; encoding: [0x06,0x00,0x1a,0xce]
+0x06,0x00,0x1a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9           ; encoding: [0x07,0x00,0x19,0xce]
+0x07,0x00,0x19,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8           ; encoding: [0x08,0x00,0x18,0xce]
+0x08,0x00,0x18,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7           ; encoding: [0x09,0x00,0x17,0xce]
+0x09,0x00,0x17,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
+0x0a,0x2c,0x06,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
+0x0b,0x59,0x05,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 ; encoding: [0x0c,0x86,0x04,0xce]
+0x0c,0x86,0x04,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 ; encoding: [0x0d,0xfc,0x03,0xce]
+0x0d,0xfc,0x03,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 ; encoding: [0x0e,0xfd,0x02,0xce]
+0x0e,0xfd,0x02,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 ; encoding: [0x0f,0xfe,0x01,0xce]
+0x0f,0xfe,0x01,0xce
+
+# GFX12: ds_param_load v16, attr63.w                ; encoding: [0x10,0xff,0x00,0xce]
+0x10,0xff,0x00,0xce
+
+# GFX12: ds_param_load v17, attr63.w                ; encoding: [0x11,0xff,0x00,0xce]
+0x11,0xff,0x00,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15  ; encoding: [0x01,0x00,0x0f,0xce]
+0x01,0x00,0x0f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14  ; encoding: [0x02,0x01,0x0e,0xce]
+0x02,0x01,0x0e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13  ; encoding: [0x03,0x02,0x0d,0xce]
+0x03,0x02,0x0d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12  ; encoding: [0x04,0x03,0x0c,0xce]
+0x04,0x03,0x0c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11  ; encoding: [0x05,0x00,0x0b,0xce]
+0x05,0x00,0x0b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10  ; encoding: [0x06,0x04,0x0a,0xce]
+0x06,0x04,0x0a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9   ; encoding: [0x07,0x09,0x09,0xce]
+0x07,0x09,0x09,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8   ; encoding: [0x08,0x0e,0x08,0xce]
+0x08,0x0e,0x08,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7   ; encoding: [0x09,0x13,0x07,0xce]
+0x09,0x13,0x07,0xce
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1          ; encoding: [0x0a,0x00,0x96,0xce]
+0x0a,0x00,0x96,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1          ; encoding: [0x0b,0x00,0x95,0xce]
+0x0b,0x00,0x95,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1          ; encoding: [0x0c,0x00,0x94,0xce]
+0x0c,0x00,0x94,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1          ; encoding: [0x0d,0x00,0x93,0xce]
+0x0d,0x00,0x93,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1          ; encoding: [0x0e,0x00,0x92,0xce]
+0x0e,0x00,0x92,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1          ; encoding: [0x0f,0x00,0x91,0xce]
+0x0f,0x00,0x91,0xce
+
+# GFX12: ds_direct_load v16 wait_vm_vsrc:1                         ; encoding: [0x10,0x00,0x90,0xce]
+0x10,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v17 wait_vm_vsrc:1                         ; encoding: [0x11,0x00,0x90,0xce]
+0x11,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1          ; encoding: [0x01,0x00,0x9f,0xce]
+0x01,0x00,0x9f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1          ; encoding: [0x02,0x00,0x9e,0xce]
+0x02,0x00,0x9e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1          ; encoding: [0x03,0x00,0x9d,0xce]
+0x03,0x00,0x9d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1          ; encoding: [0x04,0x00,0x9c,0xce]
+0x04,0x00,0x9c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1          ; encoding: [0x05,0x00,0x9b,0xce]
+0x05,0x00,0x9b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1          ; encoding: [0x06,0x00,0x9a,0xce]
+0x06,0x00,0x9a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1           ; encoding: [0x07,0x00,0x99,0xce]
+0x07,0x00,0x99,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1           ; encoding: [0x08,0x00,0x98,0xce]
+0x08,0x00,0x98,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1           ; encoding: [0x09,0x00,0x97,0xce]
+0x09,0x00,0x97,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+0x0a,0x2c,0x86,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+0x0b,0x59,0x85,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x86,0x84,0xce]
+0x0c,0x86,0x84,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0xfc,0x83,0xce]
+0x0d,0xfc,0x83,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0xfd,0x82,0xce]
+0x0e,0xfd,0x82,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0xfe,0x81,0xce]
+0x0f,0xfe,0x81,0xce
+
+# GFX12: ds_param_load v16, attr63.w wait_vm_vsrc:1                ; encoding: [0x10,0xff,0x80,0xce]
+0x10,0xff,0x80,0xce
+
+# GFX12: ds_param_load v17, attr63.w wait_vm_vsrc:1                ; encoding: [0x11,0xff,0x80,0xce]
+0x11,0xff,0x80,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1  ; encoding: [0x01,0x00,0x8f,0xce]
+0x01,0x00,0x8f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1  ; encoding: [0x02,0x01,0x8e,0xce]
+0x02,0x01,0x8e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1  ; encoding: [0x03,0x02,0x8d,0xce]
+0x03,0x02,0x8d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1  ; encoding: [0x04,0x03,0x8c,0xce]
+0x04,0x03,0x8c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1  ; encoding: [0x05,0x00,0x8b,0xce]
+0x05,0x00,0x8b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1  ; encoding: [0x06,0x04,0x8a,0xce]
+0x06,0x04,0x8a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1   ; encoding: [0x07,0x09,0x89,0xce]
+0x07,0x09,0x89,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1   ; encoding: [0x08,0x0e,0x88,0xce]
+0x08,0x0e,0x88,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1   ; encoding: [0x09,0x13,0x87,0xce]
+0x09,0x13,0x87,0xce

>From accc21c0cba0ea91fcee31fa0817c6e61fc65895 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 13 Dec 2023 10:23:32 +0100
Subject: [PATCH 2/4] always print wait_va_vdst and wait_vm_vsrc

---
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp |  14 +-
 llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s        |  74 +++++------
 .../Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt | 124 +++++++++---------
 3 files changed, 103 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index d99d343f81c40b..b6698b0865d191 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -634,21 +634,15 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
 void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
-  uint8_t Imm = MI->getOperand(OpNo).getImm();
-  if (Imm != 0) {
-    O << " wait_va_vdst:";
-    printU4ImmDecOperand(MI, OpNo, O);
-  }
+  O << " wait_va_vdst:";
+  printU4ImmDecOperand(MI, OpNo, O);
 }
 
 void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
-  uint8_t Imm = MI->getOperand(OpNo).getImm();
-  if (Imm != 0) {
-    O << " wait_vm_vsrc:";
-    printU4ImmDecOperand(MI, OpNo, O);
-  }
+  O << " wait_vm_vsrc:";
+  printU4ImmDecOperand(MI, OpNo, O);
 }
 
 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
index 4d40eae1c7e7fc..85f6d0d17ca75f 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -1,103 +1,103 @@
 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
 
 ds_direct_load v1 wait_va_vdst:15
-// GFX12: ds_direct_load v1 wait_va_vdst:15  ; encoding: [0x01,0x00,0x1f,0xce]
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
 
 ds_direct_load v2 wait_va_vdst:14
-// GFX12: ds_direct_load v2 wait_va_vdst:14  ; encoding: [0x02,0x00,0x1e,0xce]
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
 
 ds_direct_load v3 wait_va_vdst:13
-// GFX12: ds_direct_load v3 wait_va_vdst:13  ; encoding: [0x03,0x00,0x1d,0xce]
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
 
 ds_direct_load v4 wait_va_vdst:12
-// GFX12: ds_direct_load v4 wait_va_vdst:12  ; encoding: [0x04,0x00,0x1c,0xce]
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
 
 ds_direct_load v5 wait_va_vdst:11
-// GFX12: ds_direct_load v5 wait_va_vdst:11  ; encoding: [0x05,0x00,0x1b,0xce]
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
 
 ds_direct_load v6 wait_va_vdst:10
-// GFX12: ds_direct_load v6 wait_va_vdst:10  ; encoding: [0x06,0x00,0x1a,0xce]
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
 
 ds_direct_load v7 wait_va_vdst:9
-// GFX12: ds_direct_load v7 wait_va_vdst:9   ; encoding: [0x07,0x00,0x19,0xce]
+// GFX12: ds_direct_load v7 wait_va_vdst:9  wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
 
 ds_direct_load v8 wait_va_vdst:8
-// GFX12: ds_direct_load v8 wait_va_vdst:8   ; encoding: [0x08,0x00,0x18,0xce]
+// GFX12: ds_direct_load v8 wait_va_vdst:8  wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
 
 ds_direct_load v9 wait_va_vdst:7
-// GFX12: ds_direct_load v9 wait_va_vdst:7   ; encoding: [0x09,0x00,0x17,0xce]
+// GFX12: ds_direct_load v9 wait_va_vdst:7  wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
 
 ds_direct_load v10 wait_va_vdst:6
-// GFX12: ds_direct_load v10 wait_va_vdst:6  ; encoding: [0x0a,0x00,0x16,0xce]
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
 
 ds_direct_load v11 wait_va_vdst:5
-// GFX12: ds_direct_load v11 wait_va_vdst:5  ; encoding: [0x0b,0x00,0x15,0xce]
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
 
 ds_direct_load v12 wait_va_vdst:4
-// GFX12: ds_direct_load v12 wait_va_vdst:4  ; encoding: [0x0c,0x00,0x14,0xce]
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
 
 ds_direct_load v13 wait_va_vdst:3
-// GFX12: ds_direct_load v13 wait_va_vdst:3  ; encoding: [0x0d,0x00,0x13,0xce]
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
 
 ds_direct_load v14 wait_va_vdst:2
-// GFX12: ds_direct_load v14 wait_va_vdst:2  ; encoding: [0x0e,0x00,0x12,0xce]
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
 
 ds_direct_load v15 wait_va_vdst:1
-// GFX12: ds_direct_load v15 wait_va_vdst:1  ; encoding: [0x0f,0x00,0x11,0xce]
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
 
 ds_direct_load v16 wait_va_vdst:0
-// GFX12: ds_direct_load v16  ; encoding: [0x10,0x00,0x10,0xce]
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
 
 ds_direct_load v17
-// GFX12: ds_direct_load v17  ; encoding: [0x11,0x00,0x10,0xce]
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
 
 ds_param_load v1, attr0.x wait_va_vdst:15
-// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15   ; encoding: [0x01,0x00,0x0f,0xce]
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15  wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
 
 ds_param_load v2, attr0.y wait_va_vdst:14
-// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14   ; encoding: [0x02,0x01,0x0e,0xce]
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14  wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
 
 ds_param_load v3, attr0.z wait_va_vdst:13
-// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13   ; encoding: [0x03,0x02,0x0d,0xce]
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13  wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
 
 ds_param_load v4, attr0.w wait_va_vdst:12
-// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12   ; encoding: [0x04,0x03,0x0c,0xce]
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12  wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
 
 ds_param_load v5, attr0.x wait_va_vdst:11
-// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11   ; encoding: [0x05,0x00,0x0b,0xce]
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11  wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
 
 ds_param_load v6, attr1.x wait_va_vdst:10
-// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10   ; encoding: [0x06,0x04,0x0a,0xce]
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10  wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
 
 ds_param_load v7, attr2.y wait_va_vdst:9
-// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9    ; encoding: [0x07,0x09,0x09,0xce]
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9   wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
 
 ds_param_load v8, attr3.z wait_va_vdst:8
-// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8    ; encoding: [0x08,0x0e,0x08,0xce]
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8   wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
 
 ds_param_load v9, attr4.w wait_va_vdst:7
-// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7    ; encoding: [0x09,0x13,0x07,0xce]
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7   wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
 
 ds_param_load v10, attr11.x wait_va_vdst:6
-// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6  ; encoding: [0x0a,0x2c,0x06,0xce]
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
 
 ds_param_load v11, attr22.y wait_va_vdst:5
-// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5  ; encoding: [0x0b,0x59,0x05,0xce]
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
 
 ds_param_load v13, attr32.x wait_va_vdst:3
-// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3  ; encoding: [0x0d,0x80,0x03,0xce]
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x80,0x03,0xce]
 
 ds_param_load v14, attr32.y wait_va_vdst:2
-// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2  ; encoding: [0x0e,0x81,0x02,0xce]
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x81,0x02,0xce]
 
 ds_param_load v15, attr32.z wait_va_vdst:1
-// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1  ; encoding: [0x0f,0x82,0x01,0xce]
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x82,0x01,0xce]
 
 ds_param_load v16, attr32.w wait_va_vdst:0
-// GFX12: ds_param_load v16, attr32.w  ; encoding: [0x10,0x83,0x00,0xce]
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x83,0x00,0xce]
 
 ds_param_load v17, attr32.w
-// GFX12: ds_param_load v17, attr32.w  ; encoding: [0x11,0x83,0x00,0xce]
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x83,0x00,0xce]
 
 ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
 // GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1  ; encoding: [0x01,0x00,0x9f,0xce]
@@ -145,10 +145,10 @@ ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
 // GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1  ; encoding: [0x0f,0x00,0x91,0xce]
 
 ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_direct_load v16 wait_vm_vsrc:1  ; encoding: [0x10,0x00,0x90,0xce]
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x10,0x00,0x90,0xce]
 
 ds_direct_load v17 wait_vm_vsrc:1
-// GFX12: ds_direct_load v17 wait_vm_vsrc:1  ; encoding: [0x11,0x00,0x90,0xce]
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x11,0x00,0x90,0xce]
 
 ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
 // GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1   ; encoding: [0x01,0x00,0x8f,0xce]
@@ -193,7 +193,7 @@ ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
 // GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1  ; encoding: [0x0f,0x82,0x81,0xce]
 
 ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_param_load v16, attr32.w wait_vm_vsrc:1  ; encoding: [0x10,0x83,0x80,0xce]
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x10,0x83,0x80,0xce]
 
 ds_param_load v17, attr32.w wait_vm_vsrc:1
-// GFX12: ds_param_load v17, attr32.w wait_vm_vsrc:1  ; encoding: [0x11,0x83,0x80,0xce]
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x11,0x83,0x80,0xce]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
index 47a3ee614dd9fc..121afb05ff6cf0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -1,157 +1,157 @@
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
 
-# GFX12: ds_direct_load v10 wait_va_vdst:6          ; encoding: [0x0a,0x00,0x16,0xce]
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
 0x0a,0x00,0x16,0xce
 
-# GFX12: ds_direct_load v11 wait_va_vdst:5          ; encoding: [0x0b,0x00,0x15,0xce]
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
 0x0b,0x00,0x15,0xce
 
-# GFX12: ds_direct_load v12 wait_va_vdst:4          ; encoding: [0x0c,0x00,0x14,0xce]
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
 0x0c,0x00,0x14,0xce
 
-# GFX12: ds_direct_load v13 wait_va_vdst:3          ; encoding: [0x0d,0x00,0x13,0xce]
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
 0x0d,0x00,0x13,0xce
 
-# GFX12: ds_direct_load v14 wait_va_vdst:2          ; encoding: [0x0e,0x00,0x12,0xce]
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
 0x0e,0x00,0x12,0xce
 
-# GFX12: ds_direct_load v15 wait_va_vdst:1          ; encoding: [0x0f,0x00,0x11,0xce]
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
 0x0f,0x00,0x11,0xce
 
-# GFX12: ds_direct_load v16                         ; encoding: [0x10,0x00,0x10,0xce]
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
 0x10,0x00,0x10,0xce
 
-# GFX12: ds_direct_load v17                         ; encoding: [0x11,0x00,0x10,0xce]
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
 0x11,0x00,0x10,0xce
 
-# GFX12: ds_direct_load v1 wait_va_vdst:15          ; encoding: [0x01,0x00,0x1f,0xce]
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
 0x01,0x00,0x1f,0xce
 
-# GFX12: ds_direct_load v2 wait_va_vdst:14          ; encoding: [0x02,0x00,0x1e,0xce]
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
 0x02,0x00,0x1e,0xce
 
-# GFX12: ds_direct_load v3 wait_va_vdst:13          ; encoding: [0x03,0x00,0x1d,0xce]
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
 0x03,0x00,0x1d,0xce
 
-# GFX12: ds_direct_load v4 wait_va_vdst:12          ; encoding: [0x04,0x00,0x1c,0xce]
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
 0x04,0x00,0x1c,0xce
 
-# GFX12: ds_direct_load v5 wait_va_vdst:11          ; encoding: [0x05,0x00,0x1b,0xce]
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
 0x05,0x00,0x1b,0xce
 
-# GFX12: ds_direct_load v6 wait_va_vdst:10          ; encoding: [0x06,0x00,0x1a,0xce]
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
 0x06,0x00,0x1a,0xce
 
-# GFX12: ds_direct_load v7 wait_va_vdst:9           ; encoding: [0x07,0x00,0x19,0xce]
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
 0x07,0x00,0x19,0xce
 
-# GFX12: ds_direct_load v8 wait_va_vdst:8           ; encoding: [0x08,0x00,0x18,0xce]
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
 0x08,0x00,0x18,0xce
 
-# GFX12: ds_direct_load v9 wait_va_vdst:7           ; encoding: [0x09,0x00,0x17,0xce]
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
 0x09,0x00,0x17,0xce
 
-# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
 0x0a,0x2c,0x06,0xce
 
-# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
 0x0b,0x59,0x05,0xce
 
-# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 ; encoding: [0x0c,0x86,0x04,0xce]
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x86,0x04,0xce]
 0x0c,0x86,0x04,0xce
 
-# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 ; encoding: [0x0d,0xfc,0x03,0xce]
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0xfc,0x03,0xce]
 0x0d,0xfc,0x03,0xce
 
-# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 ; encoding: [0x0e,0xfd,0x02,0xce]
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0xfd,0x02,0xce]
 0x0e,0xfd,0x02,0xce
 
-# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 ; encoding: [0x0f,0xfe,0x01,0xce]
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0xfe,0x01,0xce]
 0x0f,0xfe,0x01,0xce
 
-# GFX12: ds_param_load v16, attr63.w                ; encoding: [0x10,0xff,0x00,0xce]
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0xff,0x00,0xce]
 0x10,0xff,0x00,0xce
 
-# GFX12: ds_param_load v17, attr63.w                ; encoding: [0x11,0xff,0x00,0xce]
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0xff,0x00,0xce]
 0x11,0xff,0x00,0xce
 
-# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15  ; encoding: [0x01,0x00,0x0f,0xce]
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
 0x01,0x00,0x0f,0xce
 
-# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14  ; encoding: [0x02,0x01,0x0e,0xce]
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
 0x02,0x01,0x0e,0xce
 
-# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13  ; encoding: [0x03,0x02,0x0d,0xce]
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
 0x03,0x02,0x0d,0xce
 
-# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12  ; encoding: [0x04,0x03,0x0c,0xce]
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
 0x04,0x03,0x0c,0xce
 
-# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11  ; encoding: [0x05,0x00,0x0b,0xce]
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
 0x05,0x00,0x0b,0xce
 
-# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10  ; encoding: [0x06,0x04,0x0a,0xce]
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
 0x06,0x04,0x0a,0xce
 
-# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9   ; encoding: [0x07,0x09,0x09,0xce]
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
 0x07,0x09,0x09,0xce
 
-# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8   ; encoding: [0x08,0x0e,0x08,0xce]
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
 0x08,0x0e,0x08,0xce
 
-# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7   ; encoding: [0x09,0x13,0x07,0xce]
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
 0x09,0x13,0x07,0xce
 
-# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1          ; encoding: [0x0a,0x00,0x96,0xce]
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
 0x0a,0x00,0x96,0xce
 
-# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1          ; encoding: [0x0b,0x00,0x95,0xce]
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
 0x0b,0x00,0x95,0xce
 
-# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1          ; encoding: [0x0c,0x00,0x94,0xce]
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
 0x0c,0x00,0x94,0xce
 
-# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1          ; encoding: [0x0d,0x00,0x93,0xce]
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
 0x0d,0x00,0x93,0xce
 
-# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1          ; encoding: [0x0e,0x00,0x92,0xce]
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
 0x0e,0x00,0x92,0xce
 
-# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1          ; encoding: [0x0f,0x00,0x91,0xce]
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
 0x0f,0x00,0x91,0xce
 
-# GFX12: ds_direct_load v16 wait_vm_vsrc:1                         ; encoding: [0x10,0x00,0x90,0xce]
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
 0x10,0x00,0x90,0xce
 
-# GFX12: ds_direct_load v17 wait_vm_vsrc:1                         ; encoding: [0x11,0x00,0x90,0xce]
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
 0x11,0x00,0x90,0xce
 
-# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1          ; encoding: [0x01,0x00,0x9f,0xce]
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
 0x01,0x00,0x9f,0xce
 
-# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1          ; encoding: [0x02,0x00,0x9e,0xce]
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
 0x02,0x00,0x9e,0xce
 
-# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1          ; encoding: [0x03,0x00,0x9d,0xce]
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
 0x03,0x00,0x9d,0xce
 
-# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1          ; encoding: [0x04,0x00,0x9c,0xce]
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
 0x04,0x00,0x9c,0xce
 
-# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1          ; encoding: [0x05,0x00,0x9b,0xce]
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
 0x05,0x00,0x9b,0xce
 
-# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1          ; encoding: [0x06,0x00,0x9a,0xce]
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
 0x06,0x00,0x9a,0xce
 
-# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1           ; encoding: [0x07,0x00,0x99,0xce]
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
 0x07,0x00,0x99,0xce
 
-# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1           ; encoding: [0x08,0x00,0x98,0xce]
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
 0x08,0x00,0x98,0xce
 
-# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1           ; encoding: [0x09,0x00,0x97,0xce]
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
 0x09,0x00,0x97,0xce
 
 # GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
@@ -172,35 +172,35 @@
 # GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0xfe,0x81,0xce]
 0x0f,0xfe,0x81,0xce
 
-# GFX12: ds_param_load v16, attr63.w wait_vm_vsrc:1                ; encoding: [0x10,0xff,0x80,0xce]
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0xff,0x80,0xce]
 0x10,0xff,0x80,0xce
 
-# GFX12: ds_param_load v17, attr63.w wait_vm_vsrc:1                ; encoding: [0x11,0xff,0x80,0xce]
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0xff,0x80,0xce]
 0x11,0xff,0x80,0xce
 
-# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1  ; encoding: [0x01,0x00,0x8f,0xce]
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
 0x01,0x00,0x8f,0xce
 
-# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1  ; encoding: [0x02,0x01,0x8e,0xce]
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
 0x02,0x01,0x8e,0xce
 
-# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1  ; encoding: [0x03,0x02,0x8d,0xce]
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
 0x03,0x02,0x8d,0xce
 
-# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1  ; encoding: [0x04,0x03,0x8c,0xce]
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
 0x04,0x03,0x8c,0xce
 
-# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1  ; encoding: [0x05,0x00,0x8b,0xce]
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
 0x05,0x00,0x8b,0xce
 
-# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1  ; encoding: [0x06,0x04,0x8a,0xce]
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
 0x06,0x04,0x8a,0xce
 
-# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1   ; encoding: [0x07,0x09,0x89,0xce]
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
 0x07,0x09,0x89,0xce
 
-# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1   ; encoding: [0x08,0x0e,0x88,0xce]
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
 0x08,0x0e,0x88,0xce
 
-# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1   ; encoding: [0x09,0x13,0x87,0xce]
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
 0x09,0x13,0x87,0xce

>From 0a060874343055b736e003c15c6a6ffd74e7c625 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 13 Dec 2023 11:36:56 +0100
Subject: [PATCH 3/4] Adjust spacing in tests; Add --strict-whitespace check

---
 llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s        | 92 +++++++++----------
 .../Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt |  4 +-
 2 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
index 85f6d0d17ca75f..7acccfe6a30d2e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s 
 
 ds_direct_load v1 wait_va_vdst:15
 // GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
@@ -19,13 +19,13 @@ ds_direct_load v6 wait_va_vdst:10
 // GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
 
 ds_direct_load v7 wait_va_vdst:9
-// GFX12: ds_direct_load v7 wait_va_vdst:9  wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
 
 ds_direct_load v8 wait_va_vdst:8
-// GFX12: ds_direct_load v8 wait_va_vdst:8  wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
 
 ds_direct_load v9 wait_va_vdst:7
-// GFX12: ds_direct_load v9 wait_va_vdst:7  wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
 
 ds_direct_load v10 wait_va_vdst:6
 // GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
@@ -52,31 +52,31 @@ ds_direct_load v17
 // GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
 
 ds_param_load v1, attr0.x wait_va_vdst:15
-// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15  wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
 
 ds_param_load v2, attr0.y wait_va_vdst:14
-// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14  wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
 
 ds_param_load v3, attr0.z wait_va_vdst:13
-// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13  wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
 
 ds_param_load v4, attr0.w wait_va_vdst:12
-// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12  wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
 
 ds_param_load v5, attr0.x wait_va_vdst:11
-// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11  wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
 
 ds_param_load v6, attr1.x wait_va_vdst:10
-// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10  wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
 
 ds_param_load v7, attr2.y wait_va_vdst:9
-// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9   wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
 
 ds_param_load v8, attr3.z wait_va_vdst:8
-// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8   wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
 
 ds_param_load v9, attr4.w wait_va_vdst:7
-// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7   wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
 
 ds_param_load v10, attr11.x wait_va_vdst:6
 // GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
@@ -100,100 +100,100 @@ ds_param_load v17, attr32.w
 // GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x83,0x00,0xce]
 
 ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
-// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1  ; encoding: [0x01,0x00,0x9f,0xce]
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
 
 ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1
-// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1  ; encoding: [0x02,0x00,0x9e,0xce]
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
 
 ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1
-// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1  ; encoding: [0x03,0x00,0x9d,0xce]
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
 
 ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1
-// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1  ; encoding: [0x04,0x00,0x9c,0xce]
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
 
 ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1
-// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1  ; encoding: [0x05,0x00,0x9b,0xce]
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
 
 ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1
-// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1  ; encoding: [0x06,0x00,0x9a,0xce]
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
 
 ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1
-// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1   ; encoding: [0x07,0x00,0x99,0xce]
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
 
 ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1
-// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1   ; encoding: [0x08,0x00,0x98,0xce]
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
 
 ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1
-// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1   ; encoding: [0x09,0x00,0x97,0xce]
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
 
 ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1
-// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1  ; encoding: [0x0a,0x00,0x96,0xce]
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
 
 ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1
-// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1  ; encoding: [0x0b,0x00,0x95,0xce]
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
 
 ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1
-// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1  ; encoding: [0x0c,0x00,0x94,0xce]
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
 
 ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1
-// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1  ; encoding: [0x0d,0x00,0x93,0xce]
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
 
 ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1
-// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1  ; encoding: [0x0e,0x00,0x92,0xce]
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
 
 ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
-// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1  ; encoding: [0x0f,0x00,0x91,0xce]
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
 
 ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x10,0x00,0x90,0xce]
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
 
 ds_direct_load v17 wait_vm_vsrc:1
-// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x11,0x00,0x90,0xce]
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
 
 ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
-// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1   ; encoding: [0x01,0x00,0x8f,0xce]
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
 
 ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1
-// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1   ; encoding: [0x02,0x01,0x8e,0xce]
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
 
 ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1
-// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1   ; encoding: [0x03,0x02,0x8d,0xce]
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
 
 ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1
-// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1   ; encoding: [0x04,0x03,0x8c,0xce]
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
 
 ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1
-// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1   ; encoding: [0x05,0x00,0x8b,0xce]
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
 
 ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1
-// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1   ; encoding: [0x06,0x04,0x8a,0xce]
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
 
 ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1
-// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1    ; encoding: [0x07,0x09,0x89,0xce]
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
 
 ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1
-// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1    ; encoding: [0x08,0x0e,0x88,0xce]
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
 
 ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1
-// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1    ; encoding: [0x09,0x13,0x87,0xce]
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
 
 ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1
-// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1  ; encoding: [0x0a,0x2c,0x86,0xce]
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
 
 ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1
-// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1  ; encoding: [0x0b,0x59,0x85,0xce]
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
 
 ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1
-// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1  ; encoding: [0x0d,0x80,0x83,0xce]
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x80,0x83,0xce]
 
 ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1
-// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1  ; encoding: [0x0e,0x81,0x82,0xce]
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x81,0x82,0xce]
 
 ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
-// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1  ; encoding: [0x0f,0x82,0x81,0xce]
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce]
 
 ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x10,0x83,0x80,0xce]
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
 
 ds_param_load v17, attr32.w wait_vm_vsrc:1
-// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1  ; encoding: [0x11,0x83,0x80,0xce]
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
index 121afb05ff6cf0..b7c0394429dc3b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
 
 # GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
 0x0a,0x00,0x16,0xce

>From b70e60fda8653695d024c44c6b031964e6f8e4a8 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 13 Dec 2023 11:38:24 +0100
Subject: [PATCH 4/4] trailing space removed

---
 llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
index 7acccfe6a30d2e..dbd732f9999227 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s 
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
 
 ds_direct_load v1 wait_va_vdst:15
 // GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]



More information about the llvm-commits mailing list