[llvm] [AMDGPU] Add VDSDIR instructions for GFX12 (PR #75197)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 13 02:38:44 PST 2023
https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/75197
>From e89c469c98a466cb52047700f5dd8ffcbf729a44 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Tue, 12 Dec 2023 16:13:09 +0100
Subject: [PATCH 1/4] [AMDGPU] Add VDSDIR instructions for GFX12
---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 16 ++
llvm/lib/Target/AMDGPU/LDSDIRInstructions.td | 118 ++++++++--
.../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 20 ++
.../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 4 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 4 +-
.../AMDGPU/llvm.amdgcn.lds.direct.load.ll | 36 +--
.../AMDGPU/llvm.amdgcn.lds.param.load.ll | 39 ++--
llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s | 199 +++++++++++++++++
.../Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt | 206 ++++++++++++++++++
10 files changed, 587 insertions(+), 57 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 92427335c0ad2f..cb2ea1f144e587 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -166,6 +166,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyEndpgm,
ImmTyWaitVDST,
ImmTyWaitEXP,
+ ImmTyWaitVAVDst,
+ ImmTyWaitVMVSrc,
};
// Immediate operand kind.
@@ -908,6 +910,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isEndpgm() const;
bool isWaitVDST() const;
bool isWaitEXP() const;
+ bool isWaitVAVDst() const;
+ bool isWaitVMVSrc() const;
auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
return std::bind(P, *this);
@@ -1028,6 +1032,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
}
static void printImmTy(raw_ostream& OS, ImmTy Type) {
+ // clang-format off
switch (Type) {
case ImmTyNone: OS << "None"; break;
case ImmTyGDS: OS << "GDS"; break;
@@ -1085,7 +1090,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyEndpgm: OS << "Endpgm"; break;
case ImmTyWaitVDST: OS << "WaitVDST"; break;
case ImmTyWaitEXP: OS << "WaitEXP"; break;
+ case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
+ case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
}
+ // clang-format on
}
void print(raw_ostream &OS) const override {
@@ -9130,6 +9138,14 @@ bool AMDGPUOperand::isWaitVDST() const {
return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
}
+bool AMDGPUOperand::isWaitVAVDst() const {
+ return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
+}
+
+bool AMDGPUOperand::isWaitVMVSrc() const {
+ return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
+}
+
//===----------------------------------------------------------------------===//
// VINTERP
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
index 4956a158677495..572798ae10c2b7 100644
--- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
+++ b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
@@ -1,4 +1,4 @@
-//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
+//===-- LDSDIRInstructions.td - LDS/DS Direct Instruction Definitions -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// LDSDIR encoding
+// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+)
//===----------------------------------------------------------------------===//
class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
@@ -27,8 +27,27 @@ class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
let Inst{7-0} = vdst;
}
+class VDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+ // encoding fields
+ bits<2> attrchan;
+ bits<6> attr;
+ bits<4> waitvdst;
+ bits<8> vdst;
+ bits<1> waitvsrc;
+
+ // encoding
+ let Inst{31-24} = 0xce; // encoding
+ let Inst{23} = waitvsrc;
+ let Inst{22} = 0x0; // reserved
+ let Inst{21-20} = op;
+ let Inst{19-16} = waitvdst;
+ let Inst{15-10} = !if(is_direct, ?, attr);
+ let Inst{9-8} = !if(is_direct, ?, attrchan);
+ let Inst{7-0} = vdst;
+}
+
//===----------------------------------------------------------------------===//
-// LDSDIR Classes
+// LDSDIR/VDSDIR Classes
//===----------------------------------------------------------------------===//
class LDSDIR_getIns<bit direct> {
@@ -38,10 +57,15 @@ class LDSDIR_getIns<bit direct> {
);
}
-class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
- (outs VGPR_32:$vdst),
- LDSDIR_getIns<direct>.ret,
- asm> {
+class VDSDIR_getIns<bit direct> {
+ dag ret = !if(direct,
+ (ins WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc),
+ (ins InterpAttr:$attr, InterpAttrChan:$attrchan, WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc)
+ );
+}
+
+class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> : InstSI<
+ (outs VGPR_32:$vdst), ins, asm> {
let LDSDIR = 1;
let EXP_CNT = 1;
@@ -60,8 +84,8 @@ class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
let is_direct = direct;
}
-class LDSDIR_Pseudo<string opName, bit direct> :
- LDSDIR_Common<opName, "", direct>,
+class DSDIR_Pseudo<string opName, dag ins, bit direct> :
+ DSDIR_Common<opName, "", ins, direct>,
SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -74,22 +98,31 @@ class LDSDIR_getAsm<bit direct> {
);
}
-class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
- LDSDIR_Common<lds.Mnemonic,
- lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
- lds.is_direct>,
- SIMCInstr <lds.Mnemonic, subtarget>,
- LDSDIRe<op, lds.is_direct> {
+class VDSDIR_getAsm<bit direct> {
+ string ret = !if(direct,
+ " $vdst$waitvdst$waitvsrc",
+ " $vdst, $attr$attrchan$waitvdst$waitvsrc"
+ );
+}
+
+class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> :
+ DSDIR_Common<lds.Mnemonic,
+ lds.Mnemonic # asm,
+ ins,
+ lds.is_direct>,
+ SIMCInstr <lds.Mnemonic, subtarget> {
let isPseudo = 0;
let isCodeGenOnly = 0;
}
//===----------------------------------------------------------------------===//
-// LDS Direct Instructions
+// LDS/DS Direct Instructions
//===----------------------------------------------------------------------===//
-def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
-def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
+let SubtargetPredicate = isGFX11Only in {
+
+def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>;
+def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>;
def : GCNPat <
(f32 (int_amdgcn_lds_direct_load M0)),
@@ -101,16 +134,53 @@ def : GCNPat <
(LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
>;
+} // End SubtargetPredicate = isGFX11Only
+
+let SubtargetPredicate = isGFX12Plus in {
+
+def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>;
+def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_direct_load M0)),
+ (DS_DIRECT_LOAD 0, 1)
+>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+ (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1)
+>;
+
+} // End SubtargetPredicate = isGFX12Only
+
//===----------------------------------------------------------------------===//
-// GFX11+
+// GFX11
//===----------------------------------------------------------------------===//
-multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
- def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
- let AssemblerPredicate = isGFX11Plus;
+multiclass DSDIR_Real_gfx11<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+ def _gfx11 : DSDIR_Real<lds, lds.InOperandList,
+ LDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX11>,
+ LDSDIRe<op, lds.is_direct> {
+ let AssemblerPredicate = isGFX11Only;
let DecoderNamespace = "GFX11";
}
}
-defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
-defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;
+defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>;
+defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>;
+
+//===----------------------------------------------------------------------===//
+// GFX12+
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx12<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+ def _gfx12 : DSDIR_Real<lds, lds.InOperandList,
+ VDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX12>,
+ VDSDIRe<op, lds.is_direct> {
+ let AssemblerPredicate = isGFX12Plus;
+ let DecoderNamespace = "GFX12";
+ }
+}
+
+defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>;
+defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 57f74ae08b35c4..d99d343f81c40b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -631,6 +631,26 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
printU4ImmDecOperand(MI, OpNo, O);
}
+void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint8_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " wait_va_vdst:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint8_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " wait_vm_vsrc:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ }
+}
+
void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 95c26de6299ef5..f2f985fa5b1a87 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -161,6 +161,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O);
void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, unsigned N);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9e60bdda5ef3af..e9a5beb99498b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1129,6 +1129,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
+def WaitVAVDst : NamedIntOperand<i8, "wait_va_vdst">;
+def WaitVMVSrc : NamedIntOperand<i8, "wait_vm_vsrc">;
class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
let OperandNamespace = "AMDGPU";
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 59d6ccf513bb9e..5e6c34992930be 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
}
continue;
} else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
- Opcode == AMDGPU::LDS_DIRECT_LOAD) {
+ Opcode == AMDGPU::DS_PARAM_LOAD ||
+ Opcode == AMDGPU::LDS_DIRECT_LOAD ||
+ Opcode == AMDGPU::DS_DIRECT_LOAD) {
// Mark these STRICTWQM, but only for the instruction, not its operands.
// This avoid unnecessarily marking M0 as requiring WQM.
InstrInfo &II = Instructions[&MI];
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 313bd8525c6fd4..195c5dabb4d461 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -1,23 +1,27 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
-; GFX11-LABEL: {{^}}lds_direct_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_direct_load:
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
i32 inreg %arg1, i32 inreg %arg2) #0 {
main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 5a8b03e50e2ee6..1ab753d75fe031 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -1,25 +1,32 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
-; GFX11-LABEL: {{^}}lds_param_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_param_load:
+; GCN: s_mov_b32 m0
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x
-; GFX11: s_waitcnt expcnt(4)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(3)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x
+; GCN: s_waitcnt expcnt(4)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(3)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
main_body:
%p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
new file mode 100644
index 00000000000000..4d40eae1c7e7fc
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -0,0 +1,199 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
+
+ds_direct_load v1 wait_va_vdst:15
+// GFX12: ds_direct_load v1 wait_va_vdst:15 ; encoding: [0x01,0x00,0x1f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14
+// GFX12: ds_direct_load v2 wait_va_vdst:14 ; encoding: [0x02,0x00,0x1e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13
+// GFX12: ds_direct_load v3 wait_va_vdst:13 ; encoding: [0x03,0x00,0x1d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12
+// GFX12: ds_direct_load v4 wait_va_vdst:12 ; encoding: [0x04,0x00,0x1c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11
+// GFX12: ds_direct_load v5 wait_va_vdst:11 ; encoding: [0x05,0x00,0x1b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10
+// GFX12: ds_direct_load v6 wait_va_vdst:10 ; encoding: [0x06,0x00,0x1a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9
+// GFX12: ds_direct_load v7 wait_va_vdst:9 ; encoding: [0x07,0x00,0x19,0xce]
+
+ds_direct_load v8 wait_va_vdst:8
+// GFX12: ds_direct_load v8 wait_va_vdst:8 ; encoding: [0x08,0x00,0x18,0xce]
+
+ds_direct_load v9 wait_va_vdst:7
+// GFX12: ds_direct_load v9 wait_va_vdst:7 ; encoding: [0x09,0x00,0x17,0xce]
+
+ds_direct_load v10 wait_va_vdst:6
+// GFX12: ds_direct_load v10 wait_va_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce]
+
+ds_direct_load v11 wait_va_vdst:5
+// GFX12: ds_direct_load v11 wait_va_vdst:5 ; encoding: [0x0b,0x00,0x15,0xce]
+
+ds_direct_load v12 wait_va_vdst:4
+// GFX12: ds_direct_load v12 wait_va_vdst:4 ; encoding: [0x0c,0x00,0x14,0xce]
+
+ds_direct_load v13 wait_va_vdst:3
+// GFX12: ds_direct_load v13 wait_va_vdst:3 ; encoding: [0x0d,0x00,0x13,0xce]
+
+ds_direct_load v14 wait_va_vdst:2
+// GFX12: ds_direct_load v14 wait_va_vdst:2 ; encoding: [0x0e,0x00,0x12,0xce]
+
+ds_direct_load v15 wait_va_vdst:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 ; encoding: [0x0f,0x00,0x11,0xce]
+
+ds_direct_load v16 wait_va_vdst:0
+// GFX12: ds_direct_load v16 ; encoding: [0x10,0x00,0x10,0xce]
+
+ds_direct_load v17
+// GFX12: ds_direct_load v17 ; encoding: [0x11,0x00,0x10,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 ; encoding: [0x01,0x00,0x0f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 ; encoding: [0x02,0x01,0x0e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 ; encoding: [0x03,0x02,0x0d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 ; encoding: [0x04,0x03,0x0c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 ; encoding: [0x05,0x00,0x0b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 ; encoding: [0x06,0x04,0x0a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 ; encoding: [0x07,0x09,0x09,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 ; encoding: [0x08,0x0e,0x08,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 ; encoding: [0x09,0x13,0x07,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 ; encoding: [0x0d,0x80,0x03,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 ; encoding: [0x0e,0x81,0x02,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 ; encoding: [0x0f,0x82,0x01,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0
+// GFX12: ds_param_load v16, attr32.w ; encoding: [0x10,0x83,0x00,0xce]
+
+ds_param_load v17, attr32.w
+// GFX12: ds_param_load v17, attr32.w ; encoding: [0x11,0x83,0x00,0xce]
+
+ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+
+ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+
+ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+
+ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+
+ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+
+ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+
+ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+
+ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+
+ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+
+ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_direct_load v16 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+
+ds_direct_load v17 wait_vm_vsrc:1
+// GFX12: ds_direct_load v17 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x80,0x83,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x81,0x82,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_param_load v16, attr32.w wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
+
+ds_param_load v17, attr32.w wait_vm_vsrc:1
+// GFX12: ds_param_load v17, attr32.w wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
new file mode 100644
index 00000000000000..47a3ee614dd9fc
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -0,0 +1,206 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce]
+0x0a,0x00,0x16,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5 ; encoding: [0x0b,0x00,0x15,0xce]
+0x0b,0x00,0x15,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4 ; encoding: [0x0c,0x00,0x14,0xce]
+0x0c,0x00,0x14,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3 ; encoding: [0x0d,0x00,0x13,0xce]
+0x0d,0x00,0x13,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2 ; encoding: [0x0e,0x00,0x12,0xce]
+0x0e,0x00,0x12,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1 ; encoding: [0x0f,0x00,0x11,0xce]
+0x0f,0x00,0x11,0xce
+
+# GFX12: ds_direct_load v16 ; encoding: [0x10,0x00,0x10,0xce]
+0x10,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v17 ; encoding: [0x11,0x00,0x10,0xce]
+0x11,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15 ; encoding: [0x01,0x00,0x1f,0xce]
+0x01,0x00,0x1f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14 ; encoding: [0x02,0x00,0x1e,0xce]
+0x02,0x00,0x1e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13 ; encoding: [0x03,0x00,0x1d,0xce]
+0x03,0x00,0x1d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12 ; encoding: [0x04,0x00,0x1c,0xce]
+0x04,0x00,0x1c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11 ; encoding: [0x05,0x00,0x1b,0xce]
+0x05,0x00,0x1b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10 ; encoding: [0x06,0x00,0x1a,0xce]
+0x06,0x00,0x1a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9 ; encoding: [0x07,0x00,0x19,0xce]
+0x07,0x00,0x19,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8 ; encoding: [0x08,0x00,0x18,0xce]
+0x08,0x00,0x18,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7 ; encoding: [0x09,0x00,0x17,0xce]
+0x09,0x00,0x17,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
+0x0a,0x2c,0x06,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
+0x0b,0x59,0x05,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 ; encoding: [0x0c,0x86,0x04,0xce]
+0x0c,0x86,0x04,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 ; encoding: [0x0d,0xfc,0x03,0xce]
+0x0d,0xfc,0x03,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 ; encoding: [0x0e,0xfd,0x02,0xce]
+0x0e,0xfd,0x02,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 ; encoding: [0x0f,0xfe,0x01,0xce]
+0x0f,0xfe,0x01,0xce
+
+# GFX12: ds_param_load v16, attr63.w ; encoding: [0x10,0xff,0x00,0xce]
+0x10,0xff,0x00,0xce
+
+# GFX12: ds_param_load v17, attr63.w ; encoding: [0x11,0xff,0x00,0xce]
+0x11,0xff,0x00,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 ; encoding: [0x01,0x00,0x0f,0xce]
+0x01,0x00,0x0f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 ; encoding: [0x02,0x01,0x0e,0xce]
+0x02,0x01,0x0e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 ; encoding: [0x03,0x02,0x0d,0xce]
+0x03,0x02,0x0d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 ; encoding: [0x04,0x03,0x0c,0xce]
+0x04,0x03,0x0c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 ; encoding: [0x05,0x00,0x0b,0xce]
+0x05,0x00,0x0b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 ; encoding: [0x06,0x04,0x0a,0xce]
+0x06,0x04,0x0a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 ; encoding: [0x07,0x09,0x09,0xce]
+0x07,0x09,0x09,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 ; encoding: [0x08,0x0e,0x08,0xce]
+0x08,0x0e,0x08,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 ; encoding: [0x09,0x13,0x07,0xce]
+0x09,0x13,0x07,0xce
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+0x0a,0x00,0x96,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+0x0b,0x00,0x95,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+0x0c,0x00,0x94,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+0x0d,0x00,0x93,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+0x0e,0x00,0x92,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+0x0f,0x00,0x91,0xce
+
+# GFX12: ds_direct_load v16 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+0x10,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v17 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+0x11,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+0x01,0x00,0x9f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+0x02,0x00,0x9e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+0x03,0x00,0x9d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+0x04,0x00,0x9c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+0x05,0x00,0x9b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+0x06,0x00,0x9a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+0x07,0x00,0x99,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+0x08,0x00,0x98,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+0x09,0x00,0x97,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+0x0a,0x2c,0x86,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+0x0b,0x59,0x85,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x86,0x84,0xce]
+0x0c,0x86,0x84,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0xfc,0x83,0xce]
+0x0d,0xfc,0x83,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0xfd,0x82,0xce]
+0x0e,0xfd,0x82,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0xfe,0x81,0xce]
+0x0f,0xfe,0x81,0xce
+
+# GFX12: ds_param_load v16, attr63.w wait_vm_vsrc:1 ; encoding: [0x10,0xff,0x80,0xce]
+0x10,0xff,0x80,0xce
+
+# GFX12: ds_param_load v17, attr63.w wait_vm_vsrc:1 ; encoding: [0x11,0xff,0x80,0xce]
+0x11,0xff,0x80,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+0x01,0x00,0x8f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+0x02,0x01,0x8e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+0x03,0x02,0x8d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+0x04,0x03,0x8c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+0x05,0x00,0x8b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+0x06,0x04,0x8a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+0x07,0x09,0x89,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+0x08,0x0e,0x88,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+0x09,0x13,0x87,0xce
>From accc21c0cba0ea91fcee31fa0817c6e61fc65895 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 13 Dec 2023 10:23:32 +0100
Subject: [PATCH 2/4] always print wait_va_vdst and wait_vm_vsrc
---
.../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 14 +-
llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s | 74 +++++------
.../Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt | 124 +++++++++---------
3 files changed, 103 insertions(+), 109 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index d99d343f81c40b..b6698b0865d191 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -634,21 +634,15 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint8_t Imm = MI->getOperand(OpNo).getImm();
- if (Imm != 0) {
- O << " wait_va_vdst:";
- printU4ImmDecOperand(MI, OpNo, O);
- }
+ O << " wait_va_vdst:";
+ printU4ImmDecOperand(MI, OpNo, O);
}
void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint8_t Imm = MI->getOperand(OpNo).getImm();
- if (Imm != 0) {
- O << " wait_vm_vsrc:";
- printU4ImmDecOperand(MI, OpNo, O);
- }
+ O << " wait_vm_vsrc:";
+ printU4ImmDecOperand(MI, OpNo, O);
}
void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
index 4d40eae1c7e7fc..85f6d0d17ca75f 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -1,103 +1,103 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
ds_direct_load v1 wait_va_vdst:15
-// GFX12: ds_direct_load v1 wait_va_vdst:15 ; encoding: [0x01,0x00,0x1f,0xce]
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
ds_direct_load v2 wait_va_vdst:14
-// GFX12: ds_direct_load v2 wait_va_vdst:14 ; encoding: [0x02,0x00,0x1e,0xce]
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
ds_direct_load v3 wait_va_vdst:13
-// GFX12: ds_direct_load v3 wait_va_vdst:13 ; encoding: [0x03,0x00,0x1d,0xce]
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
ds_direct_load v4 wait_va_vdst:12
-// GFX12: ds_direct_load v4 wait_va_vdst:12 ; encoding: [0x04,0x00,0x1c,0xce]
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
ds_direct_load v5 wait_va_vdst:11
-// GFX12: ds_direct_load v5 wait_va_vdst:11 ; encoding: [0x05,0x00,0x1b,0xce]
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
ds_direct_load v6 wait_va_vdst:10
-// GFX12: ds_direct_load v6 wait_va_vdst:10 ; encoding: [0x06,0x00,0x1a,0xce]
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
ds_direct_load v7 wait_va_vdst:9
-// GFX12: ds_direct_load v7 wait_va_vdst:9 ; encoding: [0x07,0x00,0x19,0xce]
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
ds_direct_load v8 wait_va_vdst:8
-// GFX12: ds_direct_load v8 wait_va_vdst:8 ; encoding: [0x08,0x00,0x18,0xce]
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
ds_direct_load v9 wait_va_vdst:7
-// GFX12: ds_direct_load v9 wait_va_vdst:7 ; encoding: [0x09,0x00,0x17,0xce]
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
ds_direct_load v10 wait_va_vdst:6
-// GFX12: ds_direct_load v10 wait_va_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce]
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
ds_direct_load v11 wait_va_vdst:5
-// GFX12: ds_direct_load v11 wait_va_vdst:5 ; encoding: [0x0b,0x00,0x15,0xce]
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
ds_direct_load v12 wait_va_vdst:4
-// GFX12: ds_direct_load v12 wait_va_vdst:4 ; encoding: [0x0c,0x00,0x14,0xce]
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
ds_direct_load v13 wait_va_vdst:3
-// GFX12: ds_direct_load v13 wait_va_vdst:3 ; encoding: [0x0d,0x00,0x13,0xce]
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
ds_direct_load v14 wait_va_vdst:2
-// GFX12: ds_direct_load v14 wait_va_vdst:2 ; encoding: [0x0e,0x00,0x12,0xce]
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
ds_direct_load v15 wait_va_vdst:1
-// GFX12: ds_direct_load v15 wait_va_vdst:1 ; encoding: [0x0f,0x00,0x11,0xce]
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
ds_direct_load v16 wait_va_vdst:0
-// GFX12: ds_direct_load v16 ; encoding: [0x10,0x00,0x10,0xce]
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
ds_direct_load v17
-// GFX12: ds_direct_load v17 ; encoding: [0x11,0x00,0x10,0xce]
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
ds_param_load v1, attr0.x wait_va_vdst:15
-// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 ; encoding: [0x01,0x00,0x0f,0xce]
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
ds_param_load v2, attr0.y wait_va_vdst:14
-// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 ; encoding: [0x02,0x01,0x0e,0xce]
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
ds_param_load v3, attr0.z wait_va_vdst:13
-// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 ; encoding: [0x03,0x02,0x0d,0xce]
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
ds_param_load v4, attr0.w wait_va_vdst:12
-// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 ; encoding: [0x04,0x03,0x0c,0xce]
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
ds_param_load v5, attr0.x wait_va_vdst:11
-// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 ; encoding: [0x05,0x00,0x0b,0xce]
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
ds_param_load v6, attr1.x wait_va_vdst:10
-// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 ; encoding: [0x06,0x04,0x0a,0xce]
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
ds_param_load v7, attr2.y wait_va_vdst:9
-// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 ; encoding: [0x07,0x09,0x09,0xce]
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
ds_param_load v8, attr3.z wait_va_vdst:8
-// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 ; encoding: [0x08,0x0e,0x08,0xce]
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
ds_param_load v9, attr4.w wait_va_vdst:7
-// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 ; encoding: [0x09,0x13,0x07,0xce]
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
ds_param_load v10, attr11.x wait_va_vdst:6
-// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
ds_param_load v11, attr22.y wait_va_vdst:5
-// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
ds_param_load v13, attr32.x wait_va_vdst:3
-// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 ; encoding: [0x0d,0x80,0x03,0xce]
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x80,0x03,0xce]
ds_param_load v14, attr32.y wait_va_vdst:2
-// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 ; encoding: [0x0e,0x81,0x02,0xce]
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x81,0x02,0xce]
ds_param_load v15, attr32.z wait_va_vdst:1
-// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 ; encoding: [0x0f,0x82,0x01,0xce]
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x82,0x01,0xce]
ds_param_load v16, attr32.w wait_va_vdst:0
-// GFX12: ds_param_load v16, attr32.w ; encoding: [0x10,0x83,0x00,0xce]
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x83,0x00,0xce]
ds_param_load v17, attr32.w
-// GFX12: ds_param_load v17, attr32.w ; encoding: [0x11,0x83,0x00,0xce]
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x83,0x00,0xce]
ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
@@ -145,10 +145,10 @@ ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_direct_load v16 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
ds_direct_load v17 wait_vm_vsrc:1
-// GFX12: ds_direct_load v17 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
@@ -193,7 +193,7 @@ ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce]
ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_param_load v16, attr32.w wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
ds_param_load v17, attr32.w wait_vm_vsrc:1
-// GFX12: ds_param_load v17, attr32.w wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
index 47a3ee614dd9fc..121afb05ff6cf0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -1,157 +1,157 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
-# GFX12: ds_direct_load v10 wait_va_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce]
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
0x0a,0x00,0x16,0xce
-# GFX12: ds_direct_load v11 wait_va_vdst:5 ; encoding: [0x0b,0x00,0x15,0xce]
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
0x0b,0x00,0x15,0xce
-# GFX12: ds_direct_load v12 wait_va_vdst:4 ; encoding: [0x0c,0x00,0x14,0xce]
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
0x0c,0x00,0x14,0xce
-# GFX12: ds_direct_load v13 wait_va_vdst:3 ; encoding: [0x0d,0x00,0x13,0xce]
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
0x0d,0x00,0x13,0xce
-# GFX12: ds_direct_load v14 wait_va_vdst:2 ; encoding: [0x0e,0x00,0x12,0xce]
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
0x0e,0x00,0x12,0xce
-# GFX12: ds_direct_load v15 wait_va_vdst:1 ; encoding: [0x0f,0x00,0x11,0xce]
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
0x0f,0x00,0x11,0xce
-# GFX12: ds_direct_load v16 ; encoding: [0x10,0x00,0x10,0xce]
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
0x10,0x00,0x10,0xce
-# GFX12: ds_direct_load v17 ; encoding: [0x11,0x00,0x10,0xce]
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
0x11,0x00,0x10,0xce
-# GFX12: ds_direct_load v1 wait_va_vdst:15 ; encoding: [0x01,0x00,0x1f,0xce]
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
0x01,0x00,0x1f,0xce
-# GFX12: ds_direct_load v2 wait_va_vdst:14 ; encoding: [0x02,0x00,0x1e,0xce]
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
0x02,0x00,0x1e,0xce
-# GFX12: ds_direct_load v3 wait_va_vdst:13 ; encoding: [0x03,0x00,0x1d,0xce]
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
0x03,0x00,0x1d,0xce
-# GFX12: ds_direct_load v4 wait_va_vdst:12 ; encoding: [0x04,0x00,0x1c,0xce]
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
0x04,0x00,0x1c,0xce
-# GFX12: ds_direct_load v5 wait_va_vdst:11 ; encoding: [0x05,0x00,0x1b,0xce]
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
0x05,0x00,0x1b,0xce
-# GFX12: ds_direct_load v6 wait_va_vdst:10 ; encoding: [0x06,0x00,0x1a,0xce]
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
0x06,0x00,0x1a,0xce
-# GFX12: ds_direct_load v7 wait_va_vdst:9 ; encoding: [0x07,0x00,0x19,0xce]
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
0x07,0x00,0x19,0xce
-# GFX12: ds_direct_load v8 wait_va_vdst:8 ; encoding: [0x08,0x00,0x18,0xce]
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
0x08,0x00,0x18,0xce
-# GFX12: ds_direct_load v9 wait_va_vdst:7 ; encoding: [0x09,0x00,0x17,0xce]
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
0x09,0x00,0x17,0xce
-# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
0x0a,0x2c,0x06,0xce
-# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
0x0b,0x59,0x05,0xce
-# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 ; encoding: [0x0c,0x86,0x04,0xce]
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x86,0x04,0xce]
0x0c,0x86,0x04,0xce
-# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 ; encoding: [0x0d,0xfc,0x03,0xce]
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0xfc,0x03,0xce]
0x0d,0xfc,0x03,0xce
-# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 ; encoding: [0x0e,0xfd,0x02,0xce]
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0xfd,0x02,0xce]
0x0e,0xfd,0x02,0xce
-# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 ; encoding: [0x0f,0xfe,0x01,0xce]
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0xfe,0x01,0xce]
0x0f,0xfe,0x01,0xce
-# GFX12: ds_param_load v16, attr63.w ; encoding: [0x10,0xff,0x00,0xce]
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0xff,0x00,0xce]
0x10,0xff,0x00,0xce
-# GFX12: ds_param_load v17, attr63.w ; encoding: [0x11,0xff,0x00,0xce]
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0xff,0x00,0xce]
0x11,0xff,0x00,0xce
-# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 ; encoding: [0x01,0x00,0x0f,0xce]
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
0x01,0x00,0x0f,0xce
-# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 ; encoding: [0x02,0x01,0x0e,0xce]
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
0x02,0x01,0x0e,0xce
-# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 ; encoding: [0x03,0x02,0x0d,0xce]
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
0x03,0x02,0x0d,0xce
-# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 ; encoding: [0x04,0x03,0x0c,0xce]
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
0x04,0x03,0x0c,0xce
-# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 ; encoding: [0x05,0x00,0x0b,0xce]
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
0x05,0x00,0x0b,0xce
-# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 ; encoding: [0x06,0x04,0x0a,0xce]
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
0x06,0x04,0x0a,0xce
-# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 ; encoding: [0x07,0x09,0x09,0xce]
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
0x07,0x09,0x09,0xce
-# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 ; encoding: [0x08,0x0e,0x08,0xce]
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
0x08,0x0e,0x08,0xce
-# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 ; encoding: [0x09,0x13,0x07,0xce]
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
0x09,0x13,0x07,0xce
-# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
0x0a,0x00,0x96,0xce
-# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
0x0b,0x00,0x95,0xce
-# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
0x0c,0x00,0x94,0xce
-# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
0x0d,0x00,0x93,0xce
-# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
0x0e,0x00,0x92,0xce
-# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
0x0f,0x00,0x91,0xce
-# GFX12: ds_direct_load v16 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
0x10,0x00,0x90,0xce
-# GFX12: ds_direct_load v17 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
0x11,0x00,0x90,0xce
-# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
0x01,0x00,0x9f,0xce
-# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
0x02,0x00,0x9e,0xce
-# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
0x03,0x00,0x9d,0xce
-# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
0x04,0x00,0x9c,0xce
-# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
0x05,0x00,0x9b,0xce
-# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
0x06,0x00,0x9a,0xce
-# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
0x07,0x00,0x99,0xce
-# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
0x08,0x00,0x98,0xce
-# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
0x09,0x00,0x97,0xce
# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
@@ -172,35 +172,35 @@
# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0xfe,0x81,0xce]
0x0f,0xfe,0x81,0xce
-# GFX12: ds_param_load v16, attr63.w wait_vm_vsrc:1 ; encoding: [0x10,0xff,0x80,0xce]
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0xff,0x80,0xce]
0x10,0xff,0x80,0xce
-# GFX12: ds_param_load v17, attr63.w wait_vm_vsrc:1 ; encoding: [0x11,0xff,0x80,0xce]
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0xff,0x80,0xce]
0x11,0xff,0x80,0xce
-# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
0x01,0x00,0x8f,0xce
-# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
0x02,0x01,0x8e,0xce
-# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
0x03,0x02,0x8d,0xce
-# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
0x04,0x03,0x8c,0xce
-# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
0x05,0x00,0x8b,0xce
-# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
0x06,0x04,0x8a,0xce
-# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
0x07,0x09,0x89,0xce
-# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
0x08,0x0e,0x88,0xce
-# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
0x09,0x13,0x87,0xce
>From 0a060874343055b736e003c15c6a6ffd74e7c625 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 13 Dec 2023 11:36:56 +0100
Subject: [PATCH 3/4] Adjust spacing in tests; Add --strict-whitespace check
---
llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s | 92 +++++++++----------
.../Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt | 4 +-
2 files changed, 48 insertions(+), 48 deletions(-)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
index 85f6d0d17ca75f..7acccfe6a30d2e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
ds_direct_load v1 wait_va_vdst:15
// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
@@ -19,13 +19,13 @@ ds_direct_load v6 wait_va_vdst:10
// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
ds_direct_load v7 wait_va_vdst:9
-// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
ds_direct_load v8 wait_va_vdst:8
-// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
ds_direct_load v9 wait_va_vdst:7
-// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
ds_direct_load v10 wait_va_vdst:6
// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
@@ -52,31 +52,31 @@ ds_direct_load v17
// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
ds_param_load v1, attr0.x wait_va_vdst:15
-// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
ds_param_load v2, attr0.y wait_va_vdst:14
-// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
ds_param_load v3, attr0.z wait_va_vdst:13
-// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
ds_param_load v4, attr0.w wait_va_vdst:12
-// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
ds_param_load v5, attr0.x wait_va_vdst:11
-// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
ds_param_load v6, attr1.x wait_va_vdst:10
-// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
ds_param_load v7, attr2.y wait_va_vdst:9
-// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
ds_param_load v8, attr3.z wait_va_vdst:8
-// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
ds_param_load v9, attr4.w wait_va_vdst:7
-// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
ds_param_load v10, attr11.x wait_va_vdst:6
// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
@@ -100,100 +100,100 @@ ds_param_load v17, attr32.w
// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x83,0x00,0xce]
ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
-// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1
-// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1
-// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1
-// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1
-// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1
-// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1
-// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1
-// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1
-// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1
-// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1
-// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1
-// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1
-// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1
-// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
-// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
ds_direct_load v17 wait_vm_vsrc:1
-// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
-// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1
-// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1
-// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1
-// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1
-// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1
-// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1
-// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1
-// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1
-// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1
-// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1
-// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1
-// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x80,0x83,0xce]
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x80,0x83,0xce]
ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1
-// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x81,0x82,0xce]
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x81,0x82,0xce]
ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
-// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce]
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce]
ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
-// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
ds_param_load v17, attr32.w wait_vm_vsrc:1
-// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
index 121afb05ff6cf0..b7c0394429dc3b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
0x0a,0x00,0x16,0xce
>From b70e60fda8653695d024c44c6b031964e6f8e4a8 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 13 Dec 2023 11:38:24 +0100
Subject: [PATCH 4/4] trailing space removed
---
llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
index 7acccfe6a30d2e..dbd732f9999227 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
ds_direct_load v1 wait_va_vdst:15
// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
More information about the llvm-commits
mailing list