[llvm] [AMDGPU][MC] Allow null where 128b or larger dst reg is expected (PR #115200)

Wed Nov 6 11:38:28 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Jun Wang (jwanggit86)

<details>
<summary>Changes</summary>

For GFX10+, currently null cannot be used as dst reg in instructions that expect the dst reg to be 128b or larger (e.g., s_load_dwordx4). This patch fixes this problem.

---
Full diff: https://github.com/llvm/llvm-project/pull/115200.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+6) 
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_smem.s (+16) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_smem.s (+16) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_smem.s (+16) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 758f864fd20e6a..4403c033196686 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9720,6 +9720,12 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
     // The following code enables it for SReg_64 operands
     // used as source and destination. Remaining source
     // operands are handled in isInlinableImm.
+    //
+    // Additionally, allow null where destination of 128-bit or larger is
+    // expected.
+  case MCK_SReg_128:
+  case MCK_SReg_256:
+  case MCK_SReg_512:
     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
   default:
     return Match_InvalidOperand;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
index b582de83a2f291..683a0195037cf5 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
@@ -281,6 +281,22 @@ s_load_dwordx16 s[20:35], s[2:3], 0x1234 glc dlc
 s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 glc dlc
 // GFX10: encoding: [0x01,0x45,0x11,0xf4,0x45,0x23,0x01,0x00]
 
+// null as dst
+s_load_dword null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx2 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x04,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx4 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x08,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx8 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x0c,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_dwordx16 null, s[2:3], s0
+// GFX10: encoding: [0x41,0x1f,0x10,0xf4,0x00,0x00,0x00,0x00]
+
 s_buffer_load_dword s5, s[4:7], s0
 // GFX10: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s
index 1d6b9476090758..e071c67f85891b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s
@@ -239,6 +239,22 @@ s_load_b512 s[20:35], s[2:3], s0 glc dlc
 s_load_b512 s[20:35], s[2:3], 0x1234 glc dlc
 // GFX11: encoding: [0x01,0x65,0x10,0xf4,0x34,0x12,0x00,0xf8]
 
+// null as dst
+s_load_b32 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x04,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x08,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x0c,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 null, s[2:3], s0
+// GFX11: encoding: [0x01,0x1f,0x10,0xf4,0x00,0x00,0x00,0x00]
+
 s_buffer_load_b32 s5, s[4:7], s0
 // GFX11: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
index 668f767661f682..80082894b39fe1 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -541,6 +541,22 @@ s_load_b512 s[20:35], s[2:3], m0
 s_load_b512 s[20:35], s[2:3], 0x0
 // GFX12: s_load_b512 s[20:35], s[2:3], 0x0       ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8]
 
+// null as dst
+s_load_b32 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x1f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x3f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x5f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x7f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 null, s[2:3], s0 offset:0x0
+// GFX12: encoding: [0x01,0x9f,0x00,0xf4,0x00,0x00,0x00,0x00]
+
 s_buffer_load_b32 s5, s[4:7], s0
 // GFX12: s_buffer_load_b32 s5, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/115200