[llvm] [AMDGPU][MC] Allow null where 128b or larger dst reg is expected (PR #115200)
Jun Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 11:38:07 PST 2024
https://github.com/jwanggit86 created https://github.com/llvm/llvm-project/pull/115200
For GFX10+, currently null cannot be used as dst reg in instructions that expect the dst reg to be 128b or larger (e.g., s_load_dwordx4). This patch fixes this problem.
>From a3953fcb4384db76f7968d30b68a6356df9d70d2 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Wed, 6 Nov 2024 11:28:47 -0800
Subject: [PATCH] [AMDGPU][MC] Allow null where 128b or larger dst reg is
expected
For GFX10+, currently null cannot be used as dst reg in instructions
that expect the dst reg to be 128b or larger (e.g., s_load_dwordx4).
This patch fixes this problem.
---
.../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6 ++++++
llvm/test/MC/AMDGPU/gfx10_asm_smem.s | 16 ++++++++++++++++
llvm/test/MC/AMDGPU/gfx11_asm_smem.s | 16 ++++++++++++++++
llvm/test/MC/AMDGPU/gfx12_asm_smem.s | 16 ++++++++++++++++
4 files changed, 54 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 758f864fd20e6a..4403c033196686 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9720,6 +9720,12 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
// The following code enables it for SReg_64 operands
// used as source and destination. Remaining source
// operands are handled in isInlinableImm.
+ //
+ // Additionally, allow null where destination of 128-bit or larger is
+ // expected.
+ case MCK_SReg_128:
+ case MCK_SReg_256:
+ case MCK_SReg_512:
return Operand.isNull() ? Match_Success : Match_InvalidOperand;
default:
return Match_InvalidOperand;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
index b582de83a2f291..683a0195037cf5 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
@@ -281,6 +281,22 @@ s_load_dwordx16 s[20:35], s[2:3], 0x1234 glc dlc
s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x01,0x45,0x11,0xf4,0x45,0x23,0x01,0x00]
+// null as dst
+s_load_dword null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx2 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x04,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx4 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x08,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx8 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x0c,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx16 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x10,0xf4,0x00,0x00,0x00,0x00]
+
s_buffer_load_dword s5, s[4:7], s0
// GFX10: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s
index 1d6b9476090758..e071c67f85891b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s
@@ -239,6 +239,22 @@ s_load_b512 s[20:35], s[2:3], s0 glc dlc
s_load_b512 s[20:35], s[2:3], 0x1234 glc dlc
// GFX11: encoding: [0x01,0x65,0x10,0xf4,0x34,0x12,0x00,0xf8]
+// null as dst
+s_load_b32 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x04,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x08,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x0c,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x10,0xf4,0x00,0x00,0x00,0x00]
+
s_buffer_load_b32 s5, s[4:7], s0
// GFX11: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
index 668f767661f682..80082894b39fe1 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -541,6 +541,22 @@ s_load_b512 s[20:35], s[2:3], m0
s_load_b512 s[20:35], s[2:3], 0x0
// GFX12: s_load_b512 s[20:35], s[2:3], 0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8]
+// null as dst
+s_load_b32 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x1f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x3f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x5f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x7f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x9f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
s_buffer_load_b32 s5, s[4:7], s0
// GFX12: s_buffer_load_b32 s5, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
More information about the llvm-commits
mailing list