[llvm] 0a79e1f - [AMDGPU] reuse blgp as neg in 2 mfma operations on gfx940

Fri Mar 18 12:57:00 PDT 2022

Author: Stanislav Mekhanoshin
Date: 2022-03-18T12:56:51-07:00
New Revision: 0a79e1f30a5f21e23ccaa76198c4796a407bdb24

URL: https://github.com/llvm/llvm-project/commit/0a79e1f30a5f21e23ccaa76198c4796a407bdb24
DIFF: https://github.com/llvm/llvm-project/commit/0a79e1f30a5f21e23ccaa76198c4796a407bdb24.diff

LOG: [AMDGPU] reuse blgp as neg in 2 mfma operations on gfx940

GFX940 repurposes BLGP as NEG only in DGEMM MFMA.

Differential Revision: https://reviews.llvm.org/D121745

Added: 
    llvm/test/MC/AMDGPU/mai-err-gfx940.s

Modified: 
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
    llvm/test/MC/AMDGPU/mai-gfx940.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 0de25e20e7d0a..9ff3d96383086 100644

--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1980,6 +1980,9 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k    : AMDGPUMfmaIntrinsic<llvm_v4f32_ty,  ll
 def int_amdgcn_mfma_f32_32x32x8bf16_1k  : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
 def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty,  llvm_v4i16_ty>;
 
+// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA.
+//       Three bits corresponding to the neg modifier applied to the respective
+//       source operand.
 def int_amdgcn_mfma_f64_16x16x4f64      : AMDGPUMfmaIntrinsic<llvm_v4f64_ty,  llvm_double_ty>;
 def int_amdgcn_mfma_f64_4x4x4f64        : AMDGPUMfmaIntrinsic<llvm_double_ty, llvm_double_ty>;
 

diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b25646814ea8f..348d2ff93dff5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1569,6 +1569,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
 
   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
+  SMLoc getBLGPLoc(const OperandVector &Operands) const;
 
   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
                       const OperandVector &Operands) const;
@@ -1600,6 +1601,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
   bool validateAGPRLdSt(const MCInst &Inst) const;
   bool validateVGPRAlign(const MCInst &Inst) const;
+  bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
   bool validateDivScale(const MCInst &Inst);
   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
@@ -4272,6 +4274,47 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
   return true;
 }
 
+SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+    if (Op.isBLGP())
+      return Op.getStartLoc();
+  }
+  return SMLoc();
+}
+
+bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
+                                   const OperandVector &Operands) {
+  unsigned Opc = Inst.getOpcode();
+  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
+  if (BlgpIdx == -1)
+    return true;
+  SMLoc BLGPLoc = getBLGPLoc(Operands);
+  if (!BLGPLoc.isValid())
+    return true;
+  bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
+  auto FB = getFeatureBits();
+  bool UsesNeg = false;
+  if (FB[AMDGPU::FeatureGFX940Insts]) {
+    switch (Opc) {
+    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+      UsesNeg = true;
+    }
+  }
+
+  if (IsNeg == UsesNeg)
+    return true;
+
+  Error(BLGPLoc,
+        UsesNeg ? "invalid modifier: blgp is not supported"
+                : "invalid modifier: neg is not supported");
+
+  return false;
+}
+
 // gfx90a has an undocumented limitation:
 // DS_GWS opcodes must use even aligned registers.
 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
@@ -4452,6 +4495,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
     return false;
   }
 
+  if (!validateBLGP(Inst, Operands)) {
+    return false;
+  }
+
   if (!validateDivScale(Inst)) {
     Error(IDLoc, "ABS not allowed in VOP3B instructions");
     return false;
@@ -7626,6 +7673,11 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
       res = parseCPol(Operands);
     } else {
       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
+      if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
+        res = parseOperandArrayWithPrefix("neg", Operands,
+                                          AMDGPUOperand::ImmTyBLGP,
+                                          nullptr);
+      }
     }
     if (res != MatchOperand_NoMatch) {
       return res;

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index af488a498a3db..c8306bc992625 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -550,6 +550,18 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
   if (!Imm)
     return;
 
+  if (AMDGPU::isGFX940(STI)) {
+    switch (MI->getOpcode()) {
+    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+      O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
+        << ((Imm >> 2) & 1) << ']';
+      return;
+    }
+  }
+
   O << " blgp:" << Imm;
 }
 

diff  --git a/llvm/test/MC/AMDGPU/mai-err-gfx940.s b/llvm/test/MC/AMDGPU/mai-err-gfx940.s
new file mode 100644
index 0000000000000..a2832eec76956
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/mai-err-gfx940.s
@@ -0,0 +1,22 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940 %s
+
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31] neg:[1,0,0]
+// GFX940: error: invalid modifier: neg is not supported
+
+v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31] neg:[1,0,0]
+// GFX940: error: invalid modifier: neg is not supported
+
+v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported

diff  --git a/llvm/test/MC/AMDGPU/mai-gfx940.s b/llvm/test/MC/AMDGPU/mai-gfx940.s
index 1c58a7a304db2..b76de2dad6a27 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx940.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx940.s
@@ -19,6 +19,30 @@ v_mfma_f64_4x4x4f64 a[0:1], v[0:1], a[2:3], a[2:3]
 v_mfma_f64_4x4x4f64 v[0:1], v[0:1], a[2:3], v[2:3]
 // GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x14]
 
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,1,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,1,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x54]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,0,1]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,0,1] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x94]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,1,1]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,1,1] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0xf4]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34]
+// GFX90A: error: invalid modifier: neg is not supported
+
+v_mfma_f64_4x4x4f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,0,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,0,0] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x34]
+// GFX90A: error: invalid modifier: neg is not supported
+
 v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
 // GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x04]
 // GFX90A: error: instruction not supported on this GPU
@@ -33,6 +57,22 @@ v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
 v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7]
 // GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x04]
 
+v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,1,1]
+// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,1,1] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0xe4]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,1,1]
+// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,1,1] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0xe4]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,0,0]
+// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,0,0] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x24]
+// GFX90A: error: invalid modifier: neg is not supported
+
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,0,0]
+// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,0,0] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x24]
+// GFX90A: error: invalid modifier: neg is not supported
+
 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
 // GFX940: v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x04]
 // GFX90A: error: instruction not supported on this GPU