[llvm] 0a79e1f - [AMDGPU] reuse blgp as neg in 2 mfma operations on gfx940
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 18 12:57:00 PDT 2022
Author: Stanislav Mekhanoshin
Date: 2022-03-18T12:56:51-07:00
New Revision: 0a79e1f30a5f21e23ccaa76198c4796a407bdb24
URL: https://github.com/llvm/llvm-project/commit/0a79e1f30a5f21e23ccaa76198c4796a407bdb24
DIFF: https://github.com/llvm/llvm-project/commit/0a79e1f30a5f21e23ccaa76198c4796a407bdb24.diff
LOG: [AMDGPU] reuse blgp as neg in 2 mfma operations on gfx940
GFX940 repurposes BLGP as NEG only in DGEMM MFMA.
Differential Revision: https://reviews.llvm.org/D121745
Added:
llvm/test/MC/AMDGPU/mai-err-gfx940.s
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
llvm/test/MC/AMDGPU/mai-gfx940.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 0de25e20e7d0a..9ff3d96383086 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1980,6 +1980,9 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, ll
def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
+// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA.
+// Three bits corresponding to the neg modifier applied to the respective
+// source operand.
def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>;
def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, llvm_double_ty>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b25646814ea8f..348d2ff93dff5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1569,6 +1569,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
+ SMLoc getBLGPLoc(const OperandVector &Operands) const;
SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
const OperandVector &Operands) const;
@@ -1600,6 +1601,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
+ bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
bool validateDivScale(const MCInst &Inst);
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
@@ -4272,6 +4274,47 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
return true;
}
+SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (Op.isBLGP())
+ return Op.getStartLoc();
+ }
+ return SMLoc();
+}
+
+bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
+ const OperandVector &Operands) {
+ unsigned Opc = Inst.getOpcode();
+ int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
+ if (BlgpIdx == -1)
+ return true;
+ SMLoc BLGPLoc = getBLGPLoc(Operands);
+ if (!BLGPLoc.isValid())
+ return true;
+ bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
+ auto FB = getFeatureBits();
+ bool UsesNeg = false;
+ if (FB[AMDGPU::FeatureGFX940Insts]) {
+ switch (Opc) {
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+ UsesNeg = true;
+ }
+ }
+
+ if (IsNeg == UsesNeg)
+ return true;
+
+ Error(BLGPLoc,
+ UsesNeg ? "invalid modifier: blgp is not supported"
+ : "invalid modifier: neg is not supported");
+
+ return false;
+}
+
// gfx90a has an undocumented limitation:
// DS_GWS opcodes must use even aligned registers.
bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
@@ -4452,6 +4495,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
return false;
}
+ if (!validateBLGP(Inst, Operands)) {
+ return false;
+ }
+
if (!validateDivScale(Inst)) {
Error(IDLoc, "ABS not allowed in VOP3B instructions");
return false;
@@ -7626,6 +7673,11 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
res = parseCPol(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
+ if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
+ res = parseOperandArrayWithPrefix("neg", Operands,
+ AMDGPUOperand::ImmTyBLGP,
+ nullptr);
+ }
}
if (res != MatchOperand_NoMatch) {
return res;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index af488a498a3db..c8306bc992625 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -550,6 +550,18 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
if (!Imm)
return;
+ if (AMDGPU::isGFX940(STI)) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+ O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
+ << ((Imm >> 2) & 1) << ']';
+ return;
+ }
+ }
+
O << " blgp:" << Imm;
}
diff --git a/llvm/test/MC/AMDGPU/mai-err-gfx940.s b/llvm/test/MC/AMDGPU/mai-err-gfx940.s
new file mode 100644
index 0000000000000..a2832eec76956
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/mai-err-gfx940.s
@@ -0,0 +1,22 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940 %s
+
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31] neg:[1,0,0]
+// GFX940: error: invalid modifier: neg is not supported
+
+v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31] neg:[1,0,0]
+// GFX940: error: invalid modifier: neg is not supported
+
+v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
+
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] blgp:7
+// GFX940: error: invalid modifier: blgp is not supported
diff --git a/llvm/test/MC/AMDGPU/mai-gfx940.s b/llvm/test/MC/AMDGPU/mai-gfx940.s
index 1c58a7a304db2..b76de2dad6a27 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx940.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx940.s
@@ -19,6 +19,30 @@ v_mfma_f64_4x4x4f64 a[0:1], v[0:1], a[2:3], a[2:3]
v_mfma_f64_4x4x4f64 v[0:1], v[0:1], a[2:3], v[2:3]
// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,1,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,1,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x54]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,0,1]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,0,1] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x94]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,1,1]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,1,1] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0xf4]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_4x4x4f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34]
+// GFX90A: error: invalid modifier: neg is not supported
+
+v_mfma_f64_4x4x4f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,0,0]
+// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,0,0] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x34]
+// GFX90A: error: invalid modifier: neg is not supported
+
v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x04]
// GFX90A: error: instruction not supported on this GPU
@@ -33,6 +57,22 @@ v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7]
// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x04]
+v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,1,1]
+// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,1,1] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0xe4]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,1,1]
+// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,1,1] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0xe4]
+// GFX90A: error: instruction not supported on this GPU
+
+v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,0,0]
+// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,0,0] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x24]
+// GFX90A: error: invalid modifier: neg is not supported
+
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,0,0]
+// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,0,0] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x24]
+// GFX90A: error: invalid modifier: neg is not supported
+
v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
// GFX940: v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x04]
// GFX90A: error: instruction not supported on this GPU
More information about the llvm-commits
mailing list