[llvm] 13c6568 - [AMDGPU][MC][GFX90A] Corrected DS_GWS opcodes

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Wed May 26 11:32:15 PDT 2021


Author: Dmitry Preobrazhensky
Date: 2021-05-26T21:31:50+03:00
New Revision: 13c6568c6e20ee70aaa8157431e8a3d01be07e81

URL: https://github.com/llvm/llvm-project/commit/13c6568c6e20ee70aaa8157431e8a3d01be07e81
DIFF: https://github.com/llvm/llvm-project/commit/13c6568c6e20ee70aaa8157431e8a3d01be07e81.diff

LOG: [AMDGPU][MC][GFX90A] Corrected DS_GWS opcodes

Corrected DS_GWS opcodes to use even aligned registers.

Differential Revision: https://reviews.llvm.org/D103185

Added: 
    llvm/test/MC/AMDGPU/gfx90a_err_pos.s

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/test/MC/AMDGPU/gfx90a_err.s
    llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
    llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 250876bd0c697..bb20f07ef35ac 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1546,6 +1546,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
   bool validateAGPRLdSt(const MCInst &Inst) const;
   bool validateVGPRAlign(const MCInst &Inst) const;
+  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
   bool validateDivScale(const MCInst &Inst);
   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
                              const SMLoc &IDLoc);
@@ -4108,6 +4109,34 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
   return true;
 }
 
+// gfx90a has an undocumented limitation:
+// DS_GWS opcodes must use even aligned registers.
+bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
+                                  const OperandVector &Operands) {
+  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
+    return true;
+
+  int Opc = Inst.getOpcode();
+  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
+      Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
+    return true;
+
+  const MCRegisterInfo *MRI = getMRI();
+  const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
+  int Data0Pos =
+      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
+  assert(Data0Pos != -1);
+  auto Reg = Inst.getOperand(Data0Pos).getReg();
+  auto RegIdx = Reg - (VGRP32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
+  if (RegIdx & 1) {
+    SMLoc RegLoc = getRegLoc(Reg, Operands);
+    Error(RegLoc, "vgpr must be even aligned");
+    return false;
+  }
+
+  return true;
+}
+
 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
                                             const OperandVector &Operands,
                                             const SMLoc &IDLoc) {
@@ -4251,6 +4280,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
       "invalid register class: vgpr tuples must be 64 bit aligned");
     return false;
   }
+  if (!validateGWS(Inst, Operands)) {
+    return false;
+  }
 
   if (!validateDivScale(Inst)) {
     Error(IDLoc, "ABS not allowed in VOP3B instructions");

diff  --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s
index 2cc88bb05ecd3..3388db0c94a81 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_err.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_err.s
@@ -260,3 +260,21 @@ v_ashrrev_i16 v0, lds_direct, v0
 
 v_add_f32 v5, v1, lds_direct
 // GFX90A: error: lds_direct is not supported on this GPU
+
+ds_gws_init a1 offset:65535 gds
+// GFX90A: error: vgpr must be even aligned
+
+ds_gws_init a255 offset:65535 gds
+// GFX90A: error: vgpr must be even aligned
+
+ds_gws_sema_br v1 offset:65535 gds
+// GFX90A: error: vgpr must be even aligned
+
+ds_gws_sema_br v255 offset:65535 gds
+// GFX90A: error: vgpr must be even aligned
+
+ds_gws_barrier a3 offset:4 gds
+// GFX90A: error: vgpr must be even aligned
+
+ds_gws_barrier a255 offset:4 gds
+// GFX90A: error: vgpr must be even aligned

diff  --git a/llvm/test/MC/AMDGPU/gfx90a_err_pos.s b/llvm/test/MC/AMDGPU/gfx90a_err_pos.s
new file mode 100644
index 0000000000000..e7377c51df13e
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx90a_err_pos.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace
+
+//==============================================================================
+// vgpr must be even aligned
+
+ds_gws_init a1 offset:65535 gds
+// CHECK: error: vgpr must be even aligned
+// CHECK-NEXT:{{^}}ds_gws_init a1 offset:65535 gds
+// CHECK-NEXT:{{^}}            ^

diff  --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
index dbc632b968a9b..5a92327d32cfd 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
@@ -10225,65 +10225,65 @@ ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:4
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
 ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds
 
-// GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_init a0 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x00,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_init a1 offset:65535 gds
+ds_gws_init a0 offset:65535 gds
 
-// GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00]
+// GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_init a255 offset:65535 gds
+ds_gws_init a254 offset:65535 gds
 
-// GFX90A: ds_gws_init a1 gds              ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_init a2 gds ; encoding: [0x00,0x00,0x33,0xdb,0x02,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_init a1 gds
+ds_gws_init a2 gds
 
-// GFX90A: ds_gws_init a1 gds              ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_init a1 gds
+ds_gws_init a0 gds
 
-// GFX90A: ds_gws_init a1 offset:4 gds     ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_init a0 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_init a1 offset:4 gds
+ds_gws_init a0 offset:4 gds
 
-// GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_sema_br a1 offset:65535 gds
+ds_gws_sema_br a2 offset:65535 gds
 
-// GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00]
+// GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_sema_br a255 offset:65535 gds
+ds_gws_sema_br a254 offset:65535 gds
 
-// GFX90A: ds_gws_sema_br a1 gds           ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_sema_br a1 gds
+ds_gws_sema_br a0 gds
 
-// GFX90A: ds_gws_sema_br a1 gds           ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_sema_br a1 gds
+ds_gws_sema_br a2 gds
 
-// GFX90A: ds_gws_sema_br a1 offset:4 gds  ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_sema_br a1 offset:4 gds
+ds_gws_sema_br a0 offset:4 gds
 
-// GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_barrier a1 offset:65535 gds
+ds_gws_barrier a2 offset:65535 gds
 
-// GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00]
+// GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_barrier a255 offset:65535 gds
+ds_gws_barrier a254 offset:65535 gds
 
-// GFX90A: ds_gws_barrier a1 gds           ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_barrier a1 gds
+ds_gws_barrier a0 gds
 
-// GFX90A: ds_gws_barrier a1 gds           ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_barrier a1 gds
+ds_gws_barrier a2 gds
 
-// GFX90A: ds_gws_barrier a1 offset:4 gds  ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
+// GFX90A: ds_gws_barrier a0 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
-ds_gws_barrier a1 offset:4 gds
+ds_gws_barrier a0 offset:4 gds
 
 // GFX90A: ds_consume a5 offset:65535      ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]
 // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt
index 69f0b3930cd0e..0b65c52d66d8d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt
@@ -7668,50 +7668,50 @@
 # GFX90A: ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds ; encoding: [0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06]
 0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06
 
-# GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00]
-0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_init a2 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00]
+0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00
 
-# GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00]
-0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00
+# GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00]
+0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00
 
-# GFX90A: ds_gws_init a1 gds              ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
-0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
+0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00
 
-# GFX90A: ds_gws_init a1 gds              ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
-0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
+0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00
 
-# GFX90A: ds_gws_init a1 offset:4 gds     ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
-0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_init a2 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00]
+0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00
 
-# GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00]
-0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00]
+0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00
 
-# GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00]
-0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00
+# GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00]
+0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00
 
-# GFX90A: ds_gws_sema_br a1 gds           ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
-0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
+0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00
 
-# GFX90A: ds_gws_sema_br a1 gds           ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
-0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00]
+0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00
 
-# GFX90A: ds_gws_sema_br a1 offset:4 gds  ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
-0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
+0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00
 
-# GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00]
-0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00]
+0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00
 
-# GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00]
-0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00
+# GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00]
+0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00
 
-# GFX90A: ds_gws_barrier a1 gds           ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
-0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
+0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00
 
-# GFX90A: ds_gws_barrier a1 gds           ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
-0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
+0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00
 
-# GFX90A: ds_gws_barrier a1 offset:4 gds  ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
-0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
+# GFX90A: ds_gws_barrier a2 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
+0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00
 
 # GFX90A: ds_consume a5 offset:65535      ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]
 0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05


        


More information about the llvm-commits mailing list