[llvm] 82e33d6 - [AMDGPU] Add VDSDIR instructions for GFX12 (#75197)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 3 07:32:05 PST 2024


Author: Mirko BrkuĊĦanin
Date: 2024-01-03T16:32:00+01:00
New Revision: 82e33d6203f62c0be16f8da8d4178e7fa9e790ea

URL: https://github.com/llvm/llvm-project/commit/82e33d6203f62c0be16f8da8d4178e7fa9e790ea
DIFF: https://github.com/llvm/llvm-project/commit/82e33d6203f62c0be16f8da8d4178e7fa9e790ea.diff

LOG: [AMDGPU] Add VDSDIR instructions for GFX12 (#75197)

Added: 
    llvm/lib/Target/AMDGPU/DSDIRInstructions.td
    llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
    llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll

Removed: 
    llvm/lib/Target/AMDGPU/LDSDIRInstructions.td


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index abd7e911beef3f..5f2b7c0b4b4ef1 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -166,6 +166,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     ImmTyEndpgm,
     ImmTyWaitVDST,
     ImmTyWaitEXP,
+    ImmTyWaitVAVDst,
+    ImmTyWaitVMVSrc,
   };
 
   // Immediate operand kind.
@@ -909,6 +911,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   bool isEndpgm() const;
   bool isWaitVDST() const;
   bool isWaitEXP() const;
+  bool isWaitVAVDst() const;
+  bool isWaitVMVSrc() const;
 
   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
     return std::bind(P, *this);
@@ -1029,6 +1033,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   }
 
   static void printImmTy(raw_ostream& OS, ImmTy Type) {
+    // clang-format off
     switch (Type) {
     case ImmTyNone: OS << "None"; break;
     case ImmTyGDS: OS << "GDS"; break;
@@ -1086,7 +1091,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     case ImmTyEndpgm: OS << "Endpgm"; break;
     case ImmTyWaitVDST: OS << "WaitVDST"; break;
     case ImmTyWaitEXP: OS << "WaitEXP"; break;
+    case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
+    case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
     }
+    // clang-format on
   }
 
   void print(raw_ostream &OS) const override {
@@ -9192,6 +9200,14 @@ bool AMDGPUOperand::isWaitVDST() const {
   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
 }
 
+bool AMDGPUOperand::isWaitVAVDst() const {
+  return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
+}
+
+bool AMDGPUOperand::isWaitVMVSrc() const {
+  return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
+}
+
 //===----------------------------------------------------------------------===//
 // VINTERP
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td
new file mode 100644
index 00000000000000..54ef785cc608cd
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td
@@ -0,0 +1,191 @@
+//===-- DSDIRInstructions.td - LDS/VDS Direct Instruction Definitions -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+)
+//===----------------------------------------------------------------------===//
+
+class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+  // encoding fields
+  bits<2> attrchan;
+  bits<6> attr;
+  bits<4> waitvdst;
+  bits<8> vdst;
+
+  // encoding
+  let Inst{31-24} = 0xce; // encoding
+  let Inst{23-22} = 0x0; // reserved
+  let Inst{21-20} = op;
+  let Inst{19-16} = waitvdst;
+  let Inst{15-10} = !if(is_direct, ?, attr);
+  let Inst{9-8} = !if(is_direct, ?, attrchan);
+  let Inst{7-0} = vdst;
+}
+
+class VDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+  // encoding fields
+  bits<2> attrchan;
+  bits<6> attr;
+  bits<4> waitvdst;
+  bits<8> vdst;
+  bits<1> waitvsrc;
+
+  // encoding
+  let Inst{31-24} = 0xce; // encoding
+  let Inst{23} = waitvsrc;
+  let Inst{22} = 0x0; // reserved
+  let Inst{21-20} = op;
+  let Inst{19-16} = waitvdst;
+  let Inst{15-10} = !if(is_direct, ?, attr);
+  let Inst{9-8} = !if(is_direct, ?, attrchan);
+  let Inst{7-0} = vdst;
+}
+
+//===----------------------------------------------------------------------===//
+// LDSDIR/VDSDIR Classes
+//===----------------------------------------------------------------------===//
+
+class LDSDIR_getIns<bit direct> {
+  dag ret = !if(direct,
+    (ins wait_vdst:$waitvdst),
+    (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst)
+  );
+}
+
+class VDSDIR_getIns<bit direct> {
+  dag ret = !if(direct,
+    (ins wait_va_vdst:$waitvdst, wait_va_vsrc:$waitvsrc),
+    (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_va_vdst:$waitvdst,
+         wait_va_vsrc:$waitvsrc)
+  );
+}
+
+class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> :
+  InstSI<(outs VGPR_32:$vdst), ins, asm> {
+  let LDSDIR = 1;
+  let EXP_CNT = 1;
+
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+  let mayStore = 0;
+
+  string Mnemonic = opName;
+  let UseNamedOperandTable = 1;
+
+  let Uses = [M0, EXEC];
+  let DisableWQM = 0;
+  let SchedRW = [WriteLDS];
+
+  bit is_direct;
+  let is_direct = direct;
+}
+
+class DSDIR_Pseudo<string opName, dag ins, bit direct> :
+  DSDIR_Common<opName, "", ins, direct>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class LDSDIR_getAsm<bit direct> {
+  string ret = !if(direct,
+    " $vdst$waitvdst",
+    " $vdst, $attr$attrchan$waitvdst"
+  );
+}
+
+class VDSDIR_getAsm<bit direct> {
+  string ret = !if(direct,
+    " $vdst$waitvdst$waitvsrc",
+    " $vdst, $attr$attrchan$waitvdst$waitvsrc"
+  );
+}
+
+class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> :
+  DSDIR_Common<lds.Mnemonic,
+               lds.Mnemonic # asm,
+               ins,
+               lds.is_direct>,
+  SIMCInstr <lds.Mnemonic, subtarget> {
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// LDS/VDS Direct Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGFX11Only in {
+
+def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>;
+def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_direct_load M0)),
+  (LDS_DIRECT_LOAD 0)
+>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+  (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
+>;
+
+} // End SubtargetPredicate = isGFX11Only
+
+let SubtargetPredicate = isGFX12Plus in {
+
+def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>;
+def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_direct_load M0)),
+  (DS_DIRECT_LOAD 0, 1)
+>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+  (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1)
+>;
+
+} // End SubtargetPredicate = isGFX12Only
+
+//===----------------------------------------------------------------------===//
+// GFX11
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx11<bits<2> op,
+                            DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+  def _gfx11 : DSDIR_Real<lds, lds.InOperandList,
+                          LDSDIR_getAsm<lds.is_direct>.ret,
+                          SIEncodingFamily.GFX11>,
+               LDSDIRe<op, lds.is_direct> {
+    let AssemblerPredicate = isGFX11Only;
+    let DecoderNamespace = "GFX11";
+  }
+}
+
+defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>;
+defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>;
+
+//===----------------------------------------------------------------------===//
+// GFX12+
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx12<bits<2> op,
+                            DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+  def _gfx12 : DSDIR_Real<lds, lds.InOperandList,
+                          VDSDIR_getAsm<lds.is_direct>.ret,
+                          SIEncodingFamily.GFX12>,
+               VDSDIRe<op, lds.is_direct> {
+    let AssemblerPredicate = isGFX12Plus;
+    let DecoderNamespace = "GFX12";
+  }
+}
+
+defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>;
+defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>;

diff  --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
deleted file mode 100644
index 4956a158677495..00000000000000
--- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
+++ /dev/null
@@ -1,116 +0,0 @@
-//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// LDSDIR encoding
-//===----------------------------------------------------------------------===//
-
-class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
-  // encoding fields
-  bits<2> attrchan;
-  bits<6> attr;
-  bits<4> waitvdst;
-  bits<8> vdst;
-
-  // encoding
-  let Inst{31-24} = 0xce; // encoding
-  let Inst{23-22} = 0x0; // reserved
-  let Inst{21-20} = op;
-  let Inst{19-16} = waitvdst;
-  let Inst{15-10} = !if(is_direct, ?, attr);
-  let Inst{9-8} = !if(is_direct, ?, attrchan);
-  let Inst{7-0} = vdst;
-}
-
-//===----------------------------------------------------------------------===//
-// LDSDIR Classes
-//===----------------------------------------------------------------------===//
-
-class LDSDIR_getIns<bit direct> {
-  dag ret = !if(direct,
-    (ins wait_vdst:$waitvdst),
-    (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst)
-  );
-}
-
-class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
-    (outs VGPR_32:$vdst),
-    LDSDIR_getIns<direct>.ret,
-    asm> {
-  let LDSDIR = 1;
-  let EXP_CNT = 1;
-
-  let hasSideEffects = 0;
-  let mayLoad = 1;
-  let mayStore = 0;
-
-  string Mnemonic = opName;
-  let UseNamedOperandTable = 1;
-
-  let Uses = [M0, EXEC];
-  let DisableWQM = 0;
-  let SchedRW = [WriteLDS];
-
-  bit is_direct;
-  let is_direct = direct;
-}
-
-class LDSDIR_Pseudo<string opName, bit direct> :
-  LDSDIR_Common<opName, "", direct>,
-  SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class LDSDIR_getAsm<bit direct> {
-  string ret = !if(direct,
-    " $vdst$waitvdst",
-    " $vdst, $attr$attrchan$waitvdst"
-  );
-}
-
-class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
-  LDSDIR_Common<lds.Mnemonic,
-                lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
-                lds.is_direct>,
-  SIMCInstr <lds.Mnemonic, subtarget>,
-  LDSDIRe<op, lds.is_direct> {
-  let isPseudo = 0;
-  let isCodeGenOnly = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// LDS Direct Instructions
-//===----------------------------------------------------------------------===//
-
-def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
-def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
-
-def : GCNPat <
-  (f32 (int_amdgcn_lds_direct_load M0)),
-  (LDS_DIRECT_LOAD 0)
->;
-
-def : GCNPat <
-  (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
-  (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
->;
-
-//===----------------------------------------------------------------------===//
-// GFX11+
-//===----------------------------------------------------------------------===//
-
-multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
-  def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
-    let AssemblerPredicate = isGFX11Plus;
-    let DecoderNamespace = "GFX11";
-  }
-}
-
-defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
-defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index edc244db613d86..ef1b85f58d0eef 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -639,6 +639,20 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
   printU4ImmDecOperand(MI, OpNo, O);
 }
 
+void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  O << " wait_va_vdst:";
+  printU4ImmDecOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  O << " wait_vm_vsrc:";
+  printU4ImmDecOperand(MI, OpNo, O);
+}
+
 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
                                     const MCSubtargetInfo &STI,
                                     raw_ostream &O) {

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 95c26de6299ef5..f2f985fa5b1a87 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -161,6 +161,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                     raw_ostream &O);
   void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O);
+  void printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O, unsigned N);

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 173c877b8d29ef..50724fd3c1cf10 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1144,6 +1144,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
 
 def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
 def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
+def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">;
+def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">;
 
 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
   let OperandNamespace = "AMDGPU";

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 8310c6b57dad58..0f127276cfd1be 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -30,7 +30,7 @@ include "SMInstructions.td"
 include "FLATInstructions.td"
 include "BUFInstructions.td"
 include "EXPInstructions.td"
-include "LDSDIRInstructions.td"
+include "DSDIRInstructions.td"
 include "VINTERPInstructions.td"
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 59d6ccf513bb9e..5e6c34992930be 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
         }
         continue;
       } else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
-                 Opcode == AMDGPU::LDS_DIRECT_LOAD) {
+                 Opcode == AMDGPU::DS_PARAM_LOAD ||
+                 Opcode == AMDGPU::LDS_DIRECT_LOAD ||
+                 Opcode == AMDGPU::DS_DIRECT_LOAD) {
         // Mark these STRICTWQM, but only for the instruction, not its operands.
         // This avoid unnecessarily marking M0 as requiring WQM.
         InstrInfo &II = Instructions[&MI];

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 313bd8525c6fd4..195c5dabb4d461 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -1,23 +1,27 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-; GFX11-LABEL: {{^}}lds_direct_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_direct_load:
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
 define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
                                        i32 inreg %arg1, i32 inreg %arg2) #0 {
 main_body:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 5a8b03e50e2ee6..1ab753d75fe031 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -1,25 +1,32 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-; GFX11-LABEL: {{^}}lds_param_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_param_load:
+; GCN: s_mov_b32 m0
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x
-; GFX11: s_waitcnt expcnt(4)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(3)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x
+; GCN: s_waitcnt expcnt(4)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(3)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
 define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
 main_body:
   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)

diff  --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
new file mode 100644
index 00000000000000..dbd732f9999227
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -0,0 +1,199 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+
+ds_direct_load v1 wait_va_vdst:15
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
+
+ds_direct_load v8 wait_va_vdst:8
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
+
+ds_direct_load v9 wait_va_vdst:7
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
+
+ds_direct_load v10 wait_va_vdst:6
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
+
+ds_direct_load v11 wait_va_vdst:5
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
+
+ds_direct_load v12 wait_va_vdst:4
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
+
+ds_direct_load v13 wait_va_vdst:3
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
+
+ds_direct_load v14 wait_va_vdst:2
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
+
+ds_direct_load v15 wait_va_vdst:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
+
+ds_direct_load v16 wait_va_vdst:0
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
+
+ds_direct_load v17
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x80,0x03,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x81,0x02,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x82,0x01,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x83,0x00,0xce]
+
+ds_param_load v17, attr32.w
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x83,0x00,0xce]
+
+ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+
+ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+
+ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+
+ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+
+ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+
+ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+
+ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+
+ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+
+ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+
+ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+
+ds_direct_load v17 wait_vm_vsrc:1
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x80,0x83,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x81,0x82,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
+
+ds_param_load v17, attr32.w wait_vm_vsrc:1
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
new file mode 100644
index 00000000000000..b7c0394429dc3b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -0,0 +1,206 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
+0x0a,0x00,0x16,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
+0x0b,0x00,0x15,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
+0x0c,0x00,0x14,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
+0x0d,0x00,0x13,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
+0x0e,0x00,0x12,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
+0x0f,0x00,0x11,0xce
+
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
+0x10,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
+0x11,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
+0x01,0x00,0x1f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
+0x02,0x00,0x1e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
+0x03,0x00,0x1d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
+0x04,0x00,0x1c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
+0x05,0x00,0x1b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
+0x06,0x00,0x1a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
+0x07,0x00,0x19,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
+0x08,0x00,0x18,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
+0x09,0x00,0x17,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
+0x0a,0x2c,0x06,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
+0x0b,0x59,0x05,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x86,0x04,0xce]
+0x0c,0x86,0x04,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0xfc,0x03,0xce]
+0x0d,0xfc,0x03,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0xfd,0x02,0xce]
+0x0e,0xfd,0x02,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0xfe,0x01,0xce]
+0x0f,0xfe,0x01,0xce
+
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0xff,0x00,0xce]
+0x10,0xff,0x00,0xce
+
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0xff,0x00,0xce]
+0x11,0xff,0x00,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
+0x01,0x00,0x0f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
+0x02,0x01,0x0e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
+0x03,0x02,0x0d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
+0x04,0x03,0x0c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
+0x05,0x00,0x0b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
+0x06,0x04,0x0a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
+0x07,0x09,0x09,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
+0x08,0x0e,0x08,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
+0x09,0x13,0x07,0xce
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+0x0a,0x00,0x96,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+0x0b,0x00,0x95,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+0x0c,0x00,0x94,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+0x0d,0x00,0x93,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+0x0e,0x00,0x92,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+0x0f,0x00,0x91,0xce
+
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+0x10,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+0x11,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+0x01,0x00,0x9f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+0x02,0x00,0x9e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+0x03,0x00,0x9d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+0x04,0x00,0x9c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+0x05,0x00,0x9b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+0x06,0x00,0x9a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+0x07,0x00,0x99,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+0x08,0x00,0x98,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+0x09,0x00,0x97,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+0x0a,0x2c,0x86,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+0x0b,0x59,0x85,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x86,0x84,0xce]
+0x0c,0x86,0x84,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0xfc,0x83,0xce]
+0x0d,0xfc,0x83,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0xfd,0x82,0xce]
+0x0e,0xfd,0x82,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0xfe,0x81,0xce]
+0x0f,0xfe,0x81,0xce
+
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0xff,0x80,0xce]
+0x10,0xff,0x80,0xce
+
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0xff,0x80,0xce]
+0x11,0xff,0x80,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+0x01,0x00,0x8f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+0x02,0x01,0x8e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+0x03,0x02,0x8d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+0x04,0x03,0x8c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+0x05,0x00,0x8b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+0x06,0x04,0x8a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+0x07,0x09,0x89,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+0x08,0x0e,0x88,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+0x09,0x13,0x87,0xce


        


More information about the llvm-commits mailing list