[llvm] [AMDGPU] Add VDSDIR instructions for GFX12 (PR #75197)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 12 07:17:21 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-mc
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
---
Patch is 33.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75197.diff
10 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+16)
- (modified) llvm/lib/Target/AMDGPU/LDSDIRInstructions.td (+94-24)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+20)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (+4)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+2)
- (modified) llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp (+3-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll (+20-16)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll (+23-16)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s (+199)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt (+206)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 92427335c0ad2..cb2ea1f144e58 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -166,6 +166,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyEndpgm,
ImmTyWaitVDST,
ImmTyWaitEXP,
+ ImmTyWaitVAVDst,
+ ImmTyWaitVMVSrc,
};
// Immediate operand kind.
@@ -908,6 +910,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isEndpgm() const;
bool isWaitVDST() const;
bool isWaitEXP() const;
+ bool isWaitVAVDst() const;
+ bool isWaitVMVSrc() const;
auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
return std::bind(P, *this);
@@ -1028,6 +1032,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
}
static void printImmTy(raw_ostream& OS, ImmTy Type) {
+ // clang-format off
switch (Type) {
case ImmTyNone: OS << "None"; break;
case ImmTyGDS: OS << "GDS"; break;
@@ -1085,7 +1090,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyEndpgm: OS << "Endpgm"; break;
case ImmTyWaitVDST: OS << "WaitVDST"; break;
case ImmTyWaitEXP: OS << "WaitEXP"; break;
+ case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
+ case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
}
+ // clang-format on
}
void print(raw_ostream &OS) const override {
@@ -9130,6 +9138,14 @@ bool AMDGPUOperand::isWaitVDST() const {
return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
}
+bool AMDGPUOperand::isWaitVAVDst() const {
+ return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
+}
+
+bool AMDGPUOperand::isWaitVMVSrc() const {
+ return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
+}
+
//===----------------------------------------------------------------------===//
// VINTERP
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
index 4956a15867749..572798ae10c2b 100644
--- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
+++ b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
@@ -1,4 +1,4 @@
-//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
+//===-- LDSDIRInstructions.td - LDS/DS Direct Instruction Definitions -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// LDSDIR encoding
+// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+)
//===----------------------------------------------------------------------===//
class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
@@ -27,8 +27,27 @@ class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
let Inst{7-0} = vdst;
}
+class VDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+ // encoding fields
+ bits<2> attrchan;
+ bits<6> attr;
+ bits<4> waitvdst;
+ bits<8> vdst;
+ bits<1> waitvsrc;
+
+ // encoding
+ let Inst{31-24} = 0xce; // encoding
+ let Inst{23} = waitvsrc;
+ let Inst{22} = 0x0; // reserved
+ let Inst{21-20} = op;
+ let Inst{19-16} = waitvdst;
+ let Inst{15-10} = !if(is_direct, ?, attr);
+ let Inst{9-8} = !if(is_direct, ?, attrchan);
+ let Inst{7-0} = vdst;
+}
+
//===----------------------------------------------------------------------===//
-// LDSDIR Classes
+// LDSDIR/VDSDIR Classes
//===----------------------------------------------------------------------===//
class LDSDIR_getIns<bit direct> {
@@ -38,10 +57,15 @@ class LDSDIR_getIns<bit direct> {
);
}
-class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
- (outs VGPR_32:$vdst),
- LDSDIR_getIns<direct>.ret,
- asm> {
+class VDSDIR_getIns<bit direct> {
+ dag ret = !if(direct,
+ (ins WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc),
+ (ins InterpAttr:$attr, InterpAttrChan:$attrchan, WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc)
+ );
+}
+
+class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> : InstSI<
+ (outs VGPR_32:$vdst), ins, asm> {
let LDSDIR = 1;
let EXP_CNT = 1;
@@ -60,8 +84,8 @@ class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
let is_direct = direct;
}
-class LDSDIR_Pseudo<string opName, bit direct> :
- LDSDIR_Common<opName, "", direct>,
+class DSDIR_Pseudo<string opName, dag ins, bit direct> :
+ DSDIR_Common<opName, "", ins, direct>,
SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -74,22 +98,31 @@ class LDSDIR_getAsm<bit direct> {
);
}
-class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
- LDSDIR_Common<lds.Mnemonic,
- lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
- lds.is_direct>,
- SIMCInstr <lds.Mnemonic, subtarget>,
- LDSDIRe<op, lds.is_direct> {
+class VDSDIR_getAsm<bit direct> {
+ string ret = !if(direct,
+ " $vdst$waitvdst$waitvsrc",
+ " $vdst, $attr$attrchan$waitvdst$waitvsrc"
+ );
+}
+
+class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> :
+ DSDIR_Common<lds.Mnemonic,
+ lds.Mnemonic # asm,
+ ins,
+ lds.is_direct>,
+ SIMCInstr <lds.Mnemonic, subtarget> {
let isPseudo = 0;
let isCodeGenOnly = 0;
}
//===----------------------------------------------------------------------===//
-// LDS Direct Instructions
+// LDS/DS Direct Instructions
//===----------------------------------------------------------------------===//
-def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
-def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
+let SubtargetPredicate = isGFX11Only in {
+
+def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>;
+def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>;
def : GCNPat <
(f32 (int_amdgcn_lds_direct_load M0)),
@@ -101,16 +134,53 @@ def : GCNPat <
(LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
>;
+} // End SubtargetPredicate = isGFX11Only
+
+let SubtargetPredicate = isGFX12Plus in {
+
+def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>;
+def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_direct_load M0)),
+ (DS_DIRECT_LOAD 0, 1)
+>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+ (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1)
+>;
+
+} // End SubtargetPredicate = isGFX12Only
+
//===----------------------------------------------------------------------===//
-// GFX11+
+// GFX11
//===----------------------------------------------------------------------===//
-multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
- def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
- let AssemblerPredicate = isGFX11Plus;
+multiclass DSDIR_Real_gfx11<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+ def _gfx11 : DSDIR_Real<lds, lds.InOperandList,
+ LDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX11>,
+ LDSDIRe<op, lds.is_direct> {
+ let AssemblerPredicate = isGFX11Only;
let DecoderNamespace = "GFX11";
}
}
-defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
-defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;
+defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>;
+defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>;
+
+//===----------------------------------------------------------------------===//
+// GFX12+
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx12<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+ def _gfx12 : DSDIR_Real<lds, lds.InOperandList,
+ VDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX12>,
+ VDSDIRe<op, lds.is_direct> {
+ let AssemblerPredicate = isGFX12Plus;
+ let DecoderNamespace = "GFX12";
+ }
+}
+
+defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>;
+defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 57f74ae08b35c..d99d343f81c40 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -631,6 +631,26 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
printU4ImmDecOperand(MI, OpNo, O);
}
+void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint8_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " wait_va_vdst:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint8_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " wait_vm_vsrc:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ }
+}
+
void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 95c26de6299ef..f2f985fa5b1a8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -161,6 +161,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O);
void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, unsigned N);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9e60bdda5ef3a..e9a5beb99498b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1129,6 +1129,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
+def WaitVAVDst : NamedIntOperand<i8, "wait_va_vdst">;
+def WaitVMVSrc : NamedIntOperand<i8, "wait_vm_vsrc">;
class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
let OperandNamespace = "AMDGPU";
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 59d6ccf513bb9..5e6c34992930b 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
}
continue;
} else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
- Opcode == AMDGPU::LDS_DIRECT_LOAD) {
+ Opcode == AMDGPU::DS_PARAM_LOAD ||
+ Opcode == AMDGPU::LDS_DIRECT_LOAD ||
+ Opcode == AMDGPU::DS_DIRECT_LOAD) {
// Mark these STRICTWQM, but only for the instruction, not its operands.
// This avoid unnecessarily marking M0 as requiring WQM.
InstrInfo &II = Instructions[&MI];
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 313bd8525c6fd..195c5dabb4d46 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -1,23 +1,27 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
-; GFX11-LABEL: {{^}}lds_direct_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_direct_load:
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
i32 inreg %arg1, i32 inreg %arg2) #0 {
main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 5a8b03e50e2ee..1ab753d75fe03 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -1,25 +1,32 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
-; GFX11-LABEL: {{^}}lds_param_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_param_load:
+; GCN: s_mov_b32 m0
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x
-; GFX11: s_waitcnt expcnt(4)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(3)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x
+; GCN: s_waitcnt expcnt(4)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(3)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
main_body:
%p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
new file mode 100644
index 0000000000000..4d40eae1c7e7f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -0,0 +1,199 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
+
+ds_direct_load v1 wait_va_vdst:15
+// GFX12: ds_direct_load v1 wait_va_vdst:15 ; encoding: [0x01,0x00,0x1f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14
+// GFX12: ds_direct_load v2 wait_va_vdst:14 ; encoding: [0x02,0x00,0x1e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13
+// GFX12: ds_direct_load v3 wait_va_vdst:13 ; encoding: [0x03,0x00,0x1d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12
+// GFX12: ds_direct_load v4 wait_va_vdst:12 ; encoding: [0x04,0x00,0x1c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11
+// GFX12: ds_direct_load v5 wait_va_vdst:11 ; encoding: [0x05,0x00,0x1b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10
+// GFX12: ds_direct_load v6 wait_va_vdst:10 ; encoding: [0x06,0x00,0x1a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9
+// GFX12: ds_direct_load v7 wait_va_vdst:9 ; encoding: [0x07,0x00,0x19,0xce]
+
+ds_direct_load v8 wait_va_vdst:8
+// GFX12: ds_direct_load v8 wait_va_vdst:8 ; encoding: [0x08,0x00,0x18,0xce]
+
+ds_direct_load v9 wait_va_vdst:7
+// GFX12: ds_direct_load v9 wait_va_vdst:7 ; encoding: [0x09,0x00,0x17,0xce]
+
+ds_direct_load v10 wait_va_vdst:6
+// GFX12: ds_direct_load v10 wait_va_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce]
+
+ds_direct_load v11 wait_va_vdst:5
+// GFX12: ds_direct_load v11 wait_va_vdst:5 ; encoding: [0x0b,0x00,0x15,0xce]
+
+ds_direct_load v12 wait_va_vdst:4
+// GFX12: ds_direct_load v12 wait_va_vdst:4 ; encoding: [0x0c,0x00,0x14,0xce]
+
+ds_direct_load v13 wait_va_vdst:3
+// GFX12: ds_direct_load v13 wait_va_vdst:3 ; encoding: [0x0d,0x00,0x13,0xce]
+
+ds_direct_load v14 wait_va_vdst:2
+// GFX12: ds_direct_load v14 wait_va_vdst:2 ; encoding: [0x0e,0x00,0x12,0xce]
+
+ds_direct_load v15 wait_va_vdst:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 ; encoding: [0x0f,0x00,0x11,0xce]
+
+ds_direct_load v16 wait_va_vdst:0
+// GFX12: ds_direct_load v16 ; encoding: [0x10,0x00,0x10,0xce]
+
+ds_direct_load v17
+// GFX12: ds_direct_load v17 ; encoding: [0x11,0x00,0x10,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 ; encoding: [0x01,0x00,0x0f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 ; encoding: [0x02,0x01,0x0e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 ; encoding: [0x03,0x02,0x0d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 ; encoding: [0x04,0x03,0x0c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 ; encoding: [0x05,0x00,0x0b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 ; encoding: [0x06,0x04,0x0a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 ; encoding: [0x07,0x09,0x09,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 ; encoding: [0x08,0x0e,0x08,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 ; encoding: [0x09,0x13,0x07,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 ; encoding: [0x0d,0x80,0x03,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2
+// GFX12: ds_param_load v14, attr32.y wa...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/75197
More information about the llvm-commits
mailing list