[llvm] 0e868af - [AMDGPU][MC][GFX11] Add validation of constant bus limitations for VOPD
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 15 06:38:34 PDT 2022
Author: Dmitry Preobrazhensky
Date: 2022-09-15T16:36:19+03:00
New Revision: 0e868aff4362bddaf303c4df7b70c8370287fa21
URL: https://github.com/llvm/llvm-project/commit/0e868aff4362bddaf303c4df7b70c8370287fa21
DIFF: https://github.com/llvm/llvm-project/commit/0e868aff4362bddaf303c4df7b70c8370287fa21.diff
LOG: [AMDGPU][MC][GFX11] Add validation of constant bus limitations for VOPD
Differential Revision: https://reviews.llvm.org/D133881
Added:
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1e929cf4a2bfb..cfdbaab52b8d2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3468,9 +3468,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
}
}
-bool
-AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
- const OperandVector &Operands) {
+bool AMDGPUAsmParser::validateConstantBusLimitations(
+ const MCInst &Inst, const OperandVector &Operands) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
unsigned LastSGPR = AMDGPU::NoRegister;
@@ -3478,69 +3477,67 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
unsigned NumLiterals = 0;
unsigned LiteralSize;
- if (Desc.TSFlags &
- (SIInstrFlags::VOPC |
- SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
- SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
- SIInstrFlags::SDWA)) {
- // Check special imm operands (used by madmk, etc)
- if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
- ++NumLiterals;
- LiteralSize = 4;
- }
-
- SmallDenseSet<unsigned> SGPRsUsed;
- unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
- if (SGPRUsed != AMDGPU::NoRegister) {
- SGPRsUsed.insert(SGPRUsed);
- ++ConstantBusUseCount;
- }
-
- const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
- const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
- const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
-
- const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
-
- for (int OpIdx : OpIndices) {
- if (OpIdx == -1) break;
-
- const MCOperand &MO = Inst.getOperand(OpIdx);
- if (usesConstantBus(Inst, OpIdx)) {
- if (MO.isReg()) {
- LastSGPR = mc2PseudoReg(MO.getReg());
- // Pairs of registers with a partial intersections like these
- // s0, s[0:1]
- // flat_scratch_lo, flat_scratch
- // flat_scratch_lo, flat_scratch_hi
- // are theoretically valid but they are disabled anyway.
- // Note that this code mimics SIInstrInfo::verifyInstruction
- if (SGPRsUsed.insert(LastSGPR).second) {
- ++ConstantBusUseCount;
- }
- } else { // Expression or a literal
-
- if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
- continue; // special operand like VINTERP attr_chan
-
- // An instruction may use only one literal.
- // This has been validated on the previous step.
- // See validateVOPLiteral.
- // This literal may be used as more than one operand.
- // If all these operands are of the same size,
- // this literal counts as one scalar value.
- // Otherwise it counts as 2 scalar values.
- // See "GFX10 Shader Programming", section 3.6.2.3.
-
- unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
- if (Size < 4) Size = 4;
-
- if (NumLiterals == 0) {
- NumLiterals = 1;
- LiteralSize = Size;
- } else if (LiteralSize != Size) {
- NumLiterals = 2;
- }
+ if (!(Desc.TSFlags &
+ (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
+ SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
+ !isVOPD(Opcode))
+ return true;
+
+ // Check special imm operands (used by madmk, etc)
+ if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
+ ++NumLiterals;
+ LiteralSize = 4;
+ }
+
+ SmallDenseSet<unsigned> SGPRsUsed;
+ unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
+ if (SGPRUsed != AMDGPU::NoRegister) {
+ SGPRsUsed.insert(SGPRUsed);
+ ++ConstantBusUseCount;
+ }
+
+ OperandIndices OpIndices = getSrcOperandIndices(Opcode);
+
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1)
+ continue;
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (usesConstantBus(Inst, OpIdx)) {
+ if (MO.isReg()) {
+ LastSGPR = mc2PseudoReg(MO.getReg());
+ // Pairs of registers with a partial intersections like these
+ // s0, s[0:1]
+ // flat_scratch_lo, flat_scratch
+ // flat_scratch_lo, flat_scratch_hi
+ // are theoretically valid but they are disabled anyway.
+ // Note that this code mimics SIInstrInfo::verifyInstruction
+ if (SGPRsUsed.insert(LastSGPR).second) {
+ ++ConstantBusUseCount;
+ }
+ } else { // Expression or a literal
+
+ if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
+ continue; // special operand like VINTERP attr_chan
+
+ // An instruction may use only one literal.
+ // This has been validated on the previous step.
+ // See validateVOPLiteral.
+ // This literal may be used as more than one operand.
+ // If all these operands are of the same size,
+ // this literal counts as one scalar value.
+ // Otherwise it counts as 2 scalar values.
+ // See "GFX10 Shader Programming", section 3.6.2.3.
+
+ unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
+ if (Size < 4)
+ Size = 4;
+
+ if (NumLiterals == 0) {
+ NumLiterals = 1;
+ LiteralSize = Size;
+ } else if (LiteralSize != Size) {
+ NumLiterals = 2;
}
}
}
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
index 7e671bc4b883a..6a3d3e06978f6 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
@@ -72,3 +72,72 @@ v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0,
// GFX11: error: only one literal operand is allowed
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162
// GFX11-NEXT:{{^}} ^
+
+//===----------------------------------------------------------------------===//
+// A VOPD instruction cannot use more than 2 scalar operands
+//===----------------------------------------------------------------------===//
+
+// 2
diff erent SGPRs + LITERAL
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
+// GFX11-NEXT:{{^}} ^
+
+v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
+// GFX11-NEXT:{{^}} ^
+
+v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
+// GFX11-NEXT:{{^}} ^
+
+// 2
diff erent SGPRs + VCC
+
+v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
+// GFX11-NEXT:{{^}} ^
+
+v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
+// GFX11-NEXT:{{^}} ^
+
+v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
+// GFX11-NEXT:{{^}} ^
+
+// SGPR + LITERAL + VCC
+
+v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
+// GFX11-NEXT:{{^}} ^
+
+v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
+// GFX11-NEXT:{{^}} ^
+
+v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
+// GFX11-NEXT:{{^}} ^
+
+v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
+// GFX11-NEXT:{{^}} ^
+
+// SGPR + VCC + VCC_LO
+// This is a special case because implicit VCC operand has 64 bit size.
+// SP3 does not accept this instruction as well.
+
+v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
+// GFX11: error: invalid operand (violates constant bus restrictions)
+// GFX11-NEXT:{{^}}v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
+// GFX11-NEXT:{{^}} ^
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s
index 5e3fe36101da3..8d01ca7a5968e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s
@@ -39,3 +39,66 @@ v_dual_fmamk_f32 v122, v74, 0xfe0b, v162 :: v_dual_dot2acc_f32_f16 v24
v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
+
+//===----------------------------------------------------------------------===//
+// A VOPD instruction can use 2 scalar operands,
+// but implicit VCC must be counted in.
+//===----------------------------------------------------------------------===//
+
+// 2
diff erent SGPRs
+
+v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
+// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
+
+// SGPR + LITERAL
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
+// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+
+v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
+// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
+
+// SGPR*2 + LITERAL
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s74, v98
+// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// SGPR + LITERAL*2
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
+// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// SGPR*2 + LITERAL*2
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, s74, 2.741, v1
+// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// LITERAL + VCC
+
+v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, v2, v3
+// GFX11: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// LITERAL*2 + VCC
+
+v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, 2.741, v3
+// GFX11: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// LITERAL*2 + VCC*2
+
+v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_cndmask_b32 v6, 0xbabe, v3
+// GFX11: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00]
+
+// SGPR*2 + VCC
+
+v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3
+// GFX11: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff]
+
+// SGPR*2 + VCC*2
+
+v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3
+// GFX11: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff]
+
+// VCC*2
+
+v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, v1, v3
+// GFX11: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff]
More information about the llvm-commits
mailing list