[llvm] 6a21e00 - [AMDGPU][AsmParser] Allow `v_writelane_b32` to use SGPR and M0 as source operands at the same time (#78827)

Tue Jan 30 12:39:35 PST 2024

Author: Shilei Tian
Date: 2024-01-30T15:39:31-05:00
New Revision: 6a21e00e397648141ed36aae4bd958efa09908f3

URL: https://github.com/llvm/llvm-project/commit/6a21e00e397648141ed36aae4bd958efa09908f3
DIFF: https://github.com/llvm/llvm-project/commit/6a21e00e397648141ed36aae4bd958efa09908f3.diff

LOG: [AMDGPU][AsmParser] Allow `v_writelane_b32` to use SGPR and M0 as source operands at the same time (#78827)

Currently the asm parser takes `v_writelane_b32 v1, s13, m0` as illegal
instruction for pre-gfx11 because it uses two constant buses while the
hardware
can only allow one. However, based on the comment of
`AMDGPUInstructionSelector::selectWritelane`,
it is allowed to have M0 as lane selector and a SGPR used as SRC0
because the
lane selector doesn't count as a use of constant bus. In fact, codegen
can already
generate this form, but this inconsistency is not exposed because the
validation
of constant bus limitation only happens when paring an assembly but we
don't have
a test case when both SGPR and M0 used as source operands for the
instruction.

Added: 
    llvm/test/MC/AMDGPU/writelane_m0.s
    llvm/test/MachineVerifier/writelane_m0.mir

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cf8c6a1fc753d..33377a01ceb52 100644

--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3514,6 +3514,24 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
   }
 }
 
+// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
+// Writelane is special in that it can use SGPR and M0 (which would normally
+// count as using the constant bus twice - but in this case it is allowed since
+// the lane selector doesn't count as a use of the constant bus). However, it is
+// still required to abide by the 1 SGPR rule.
+static bool checkWriteLane(const MCInst &Inst) {
+  const unsigned Opcode = Inst.getOpcode();
+  if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
+    return false;
+  const MCOperand &LaneSelOp = Inst.getOperand(2);
+  if (!LaneSelOp.isReg())
+    return false;
+  auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
+  if (LaneSelReg == M0 || LaneSelReg == M0_gfxpre11)
+    return true;
+  return false;
+}
+
 bool AMDGPUAsmParser::validateConstantBusLimitations(
     const MCInst &Inst, const OperandVector &Operands) {
   const unsigned Opcode = Inst.getOpcode();
@@ -3529,6 +3547,9 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
       !isVOPD(Opcode))
     return true;
 
+  if (checkWriteLane(Inst))
+    return true;
+
   // Check special imm operands (used by madmk, etc)
   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
     ++NumLiterals;

diff  --git a/llvm/test/MC/AMDGPU/writelane_m0.s b/llvm/test/MC/AMDGPU/writelane_m0.s
new file mode 100644
index 0000000000000..d12ae7eec67c9
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/writelane_m0.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx600 -show-encoding %s | FileCheck %s -check-prefix=GFX6
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx700 -show-encoding %s | FileCheck %s -check-prefix=GFX7
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx904 -show-encoding %s | FileCheck %s -check-prefix=GFX9
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx940 -show-encoding %s | FileCheck %s -check-prefix=GFX9
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1010 -show-encoding %s | FileCheck %s -check-prefix=GFX10
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1030 -show-encoding %s | FileCheck %s -check-prefix=GFX10
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1100 -show-encoding %s | FileCheck %s -check-prefix=GFX11
+// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1200 -show-encoding %s | FileCheck %s -check-prefix=GFX12
+
+.text
+  v_writelane_b32 v1, s13, m0
+
+// GFX6: v_writelane_b32 v1, s13, m0 ; encoding: [0x0d,0xf8,0x02,0x04]
+// GFX7: v_writelane_b32 v1, s13, m0 ; encoding: [0x0d,0xf8,0x02,0x04]
+// GFX9: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x8a,0xd2,0x0d,0xf8,0x00,0x00]
+// GFX10: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xf8,0x00,0x00]
+// GFX11: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xfa,0x00,0x00]
+// GFX12: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xfa,0x00,0x00]

diff  --git a/llvm/test/MachineVerifier/writelane_m0.mir b/llvm/test/MachineVerifier/writelane_m0.mir
new file mode 100644
index 0000000000000..2db6f47a42234
--- /dev/null
+++ b/llvm/test/MachineVerifier/writelane_m0.mir
@@ -0,0 +1,16 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
+
+---
+
+name: writelane_m0
+body: |
+  bb.0:
+    ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr0, $m0, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr0, $m0, $vgpr0