[llvm] [AMDGPU][True16][MC] 16bit operand and vdst support in MC (PR #104510)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 15:54:44 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
---
Patch is 224.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104510.diff
35 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+27-9)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+14-8)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+7-3)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+156-53)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+4-4)
- (modified) llvm/lib/Target/AMDGPU/SIModeRegister.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+20-25)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+15-7)
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir (+90-72)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir (+80-68)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir (+18-14)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir (+20-12)
- (added) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir (+19)
- (added) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir (+19)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir (-17)
- (modified) llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir (+128-128)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1.s (+25-19)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s (+32-32)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s (+16-10)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s (+90-24)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s (+20-20)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s (+28-28)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s (+8-8)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s (+18-18)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1.s (+46-35)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt (+45-20)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt (+62-34)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt (+26-6)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt (+56-28)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt (+16-8)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt (+34-17)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1a10206eea2374..6181a36b016adf 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -280,8 +280,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
- bool isRegOrImmWithIntT16InputMods() const {
- return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
+ template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
+ return isRegOrImmWithInputMods(
+ IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
}
bool isRegOrImmWithInt32InputMods() const {
@@ -292,6 +293,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
}
+ template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
+ return isRegOrInline(
+ IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
+ }
+
bool isRegOrInlineImmWithInt32InputMods() const {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -304,8 +310,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
- bool isRegOrImmWithFPT16InputMods() const {
- return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
+ return isRegOrImmWithInputMods(
+ IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
}
bool isRegOrImmWithFP32InputMods() const {
@@ -354,6 +361,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
}
bool isVRegWithInputMods() const;
+ template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
template <bool IsFake16> bool isT16VRegWithInputMods() const;
bool isSDWAOperand(MVT type) const;
@@ -515,7 +523,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
- bool isVCSrcTB16() const {
+ bool isVCSrcT_b16() const {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
}
@@ -545,7 +553,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
}
- bool isVCSrcTF16() const {
+ bool isVCSrcT_f16() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ }
+
+ bool isVCSrcT_bf16() const {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
}
@@ -583,7 +595,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
- bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
+ bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
bool isVSrcT_b16_Lo128() const {
return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
@@ -617,7 +629,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
- bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
+ bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
bool isVSrcT_bf16_Lo128() const {
return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
@@ -2162,11 +2174,17 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
}
-template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
+template <bool IsFake16>
+bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
: AMDGPU::VGPR_16_Lo128RegClassID);
}
+template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
+ return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
+ : AMDGPU::VGPR_16RegClassID);
+}
+
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1a0dc7098347ac..c8b8a7d120792e 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -328,36 +328,40 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
assert(isUInt<9>(Imm) && "9-bit encoding expected");
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- bool IsVGPR = Imm & (1 << 8);
- if (IsVGPR) {
+ if (Imm & AMDGPU::EncValues::IS_VGPR) {
bool IsHi = Imm & (1 << 7);
unsigned RegIdx = Imm & 0x7f;
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
- return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
- Imm & 0xFF, false, 16));
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+ OpWidth, Imm & 0xFF, false, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics));
}
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
assert(isUInt<10>(Imm) && "10-bit encoding expected");
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- bool IsVGPR = Imm & (1 << 8);
- if (IsVGPR) {
+ if (Imm & AMDGPU::EncValues::IS_VGPR) {
bool IsHi = Imm & (1 << 9);
unsigned RegIdx = Imm & 0xff;
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
- return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
- Imm & 0xFF, false, 16));
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+ OpWidth, Imm & 0xFF, false, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics));
}
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
@@ -628,6 +632,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
convertVOP3DPPInst(MI); // Regular VOP3 case
}
+ convertTrue16OpSel(MI);
+
if (AMDGPU::isMAC(MI.getOpcode())) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6dce41d1605fa4..f9fce1ea899d33 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5424,9 +5424,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
- case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
- case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
- case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
+ case AMDGPU::S_CVT_F32_F16:
+ case AMDGPU::S_CVT_HI_F32_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
+ : AMDGPU::V_CVT_F32_F16_fake16_e64;
+ case AMDGPU::S_CVT_F16_F32:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
+ : AMDGPU::V_CVT_F16_F32_fake16_e64;
case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 85281713e22b1f..f0fc427531dd92 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1205,9 +1205,11 @@ class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
}
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
-def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
- let Name = "RegOrImmWithFPT16InputMods";
- let PredicateMethod = "isRegOrImmWithFPT16InputMods";
+class FPT16InputModsMatchClass<bit IsFake16> : FPInputModsMatchClass<16> {
+ let Name = !if(IsFake16, "RegOrImmWithFPFake16InputMods",
+ "RegOrImmWithFPT16InputMods");
+ let PredicateMethod = "isRegOrImmWithFPT16InputMods<" #
+ !if(IsFake16, "true", "false") # ">";
}
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -1232,12 +1234,19 @@ class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
}
def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
-def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
+class FPT16InputMods<bit IsFake16> : FPInputMods<FPT16InputModsMatchClass<IsFake16>> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
+def FP32T16DstInputMods : FPInputMods<FP32InputModsMatchClass> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
-class FP16VCSrcInputMods<bit IsFake16>
- : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
+class FPT16VCSrcInputMods<bit IsFake16 = 1>
+ : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>> {
+let EncoderMethod = "getMachineOpValueT16";
+}
def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
class IntInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1249,21 +1258,38 @@ class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize>
let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
}
-def IntT16InputModsMatchClass : IntInputModsMatchClass<16> {
- let Name = "RegOrImmWithIntT16InputMods";
- let PredicateMethod = "isRegOrImmWithIntT16InputMods";
+class IntT16InputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
+ let Name = !if(IsFake16, "RegOrImmWithIntFake16InputMods",
+ "RegOrImmWithIntT16InputMods");
+ let PredicateMethod = "isRegOrImmWithIntT16InputMods<" #
+ !if(IsFake16, "true", "false") # ">";
}
def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
+class IntT16VCSrcInputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
+ let Name = !if(IsFake16, "RegOrInlineImmWithIntFake16InputMods",
+ "RegOrInlineImmWithIntT16InputMods");
+ let PredicateMethod = "isRegOrInlineImmWithIntT16InputMods<" #
+ !if(IsFake16, "true", "false") # ">";
+}
class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
-def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>;
+class IntT16InputMods<bit IsFake16> : IntInputMods<IntT16InputModsMatchClass<IsFake16>> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
+def Int32T16DstInputMods : IntInputMods<Int32InputModsMatchClass> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
+class IntT16VCSrcInputMods<bit IsFake16 = 1>
+ : IntInputMods<IntT16VCSrcInputModsMatchClass<IsFake16>> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
class OpSelModsMatchClass : AsmOperandClass {
let Name = "OpSelMods";
@@ -1297,6 +1323,23 @@ def FPVRegInputModsMatchClass : AsmOperandClass {
let PredicateMethod = "isVRegWithInputMods";
}
+def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndFPInputMods";
+}
+
+def FPVRegT16DstInputMods : InputMods <FPVRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndFPInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
+}
+
+class FPT16_Lo128VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+ let Name = !if(IsFake16, "Fake16_Lo128VRegWithFPInputMods",
+ "T16_Lo128VRegWithFPInputMods");
+ let ParserMethod = "parseRegWithFPInputMods";
+ let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
+ !if(IsFake16, "true", "false") # ">";
+}
+
class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
"T16VRegWithFPInputMods");
@@ -1305,13 +1348,16 @@ class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
!if(IsFake16, "true", "false") # ">";
}
-def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
+class FPT16_Lo128VRegInputMods<bit IsFake16 = 1>
+ : InputMods <FPT16_Lo128VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndFPInputMods";
+ let EncoderMethod = "getMachineOpValueT16Lo128";
}
-class FPT16VRegInputMods<bit IsFake16>
+class FPT16VRegInputMods<bit IsFake16 = 1>
: InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndFPInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
}
class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1342,7 +1388,15 @@ def IntVRegInputModsMatchClass : AsmOperandClass {
let PredicateMethod = "isVRegWithInputMods";
}
-class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+class IntT16_Lo128VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
+ let Name = !if(IsFake16, "Fake16_Lo128VRegWithIntInputMods",
+ "T16_Lo128VRegWithIntInputMods");
+ let ParserMethod = "parseRegWithIntInputMods";
+ let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
+ !if(IsFake16, "true", "false") # ">";
+}
+
+class IntT16VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
"T16VRegWithIntInputMods");
let ParserMethod = "parseRegWithIntInputMods";
@@ -1350,15 +1404,27 @@ class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
!if(IsFake16, "true", "false") # ">";
}
-class IntT16VRegInputMods<bit IsFake16>
+class IntT16_Lo128VRegInputMods<bit IsFake16 = 1>
+ : InputMods <IntT16_Lo128VRegInputModsMatchClass<IsFake16>> {
+ let PrintMethod = "printOperandAndIntInputMods";
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
+class IntT16VRegInputMods<bit IsFake16 = 1>
: InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndIntInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
}
def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
+def IntVRegT16DstInputMods : InputMods <IntVRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndIntInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
+}
+
class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "PackedFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithFPInputMods";
@@ -1585,7 +1651,7 @@ class getSOPSrcForVT<ValueType VT> {
}
// Returns the vreg register class to use for source operand given VT
-class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
+class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
RegisterOperand ret =
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
@@ -1627,12 +1693,12 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
}
// Src2 of VOP3 DPP instructions cannot be a literal
-class getVOP3DPPSrcForVT<ValueType VT> {
+class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
RegisterOperand ret =
!cond(!eq(VT, i1) : SSrc_i1,
- !eq(VT, i16) : VCSrc_b16,
- !eq(VT, f16) : VCSrc_f16,
- !eq(VT, bf16) : VCSrc_bf16,
+ !eq(VT, i16) : !if (IsFake16, VCSrc_b16, VCSrcT_b16),
+ !eq(VT, f16) : !if (IsFake16, VCSrc_f16, VCSrcT_f16),
+ !eq(VT, bf16) : !if (IsFake16, VCSrc_bf16, VCSrcT_bf16),
!eq(VT, v2i16) : VCSrc_v2b16,
!eq(VT, v2f16) : VCSrc_v2f16,
!eq(VT, v2bf16) : VCSrc_v2bf16,
@@ -1666,23 +1732,27 @@ class isModifierType<ValueType SrcVT> {
!eq(SrcVT.Value, v16bf16.Value));
}
-// Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
- Operand ret = !if(!eq(VT.Size, 64),
+// Return type of input modifiers operand for specified input operand.
+// True16: If the destination is a 16-bit value, the src0 modifier must hold
+// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
+// 64-bit src types are not implemented for True16 dst.
+class getSrc0Mod <ValueType VT, ValueType DstVT, bit IsTrue16 = 0, bit IsFake16 = 1> {
+ defvar T16Dst = !if(!eq(VT.Size, 64),
+ !if(VT.isFP, FP64InputMods, Int64InputMods),
+ !if(!eq(VT.Size, 16),
+ !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
+ !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
+ !if(VT.isFP, FP32T16DstInputMods, Int32T16DstInputMods)));
+ defvar Normal = !if(!eq(VT.Size, 64),
!if(VT.isFP, FP64InputMods, Int64InputMods),
!if(!eq(VT.Size, 16),
- !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods),
- !if(IsTrue16, IntT16InputMods, IntOpSelMods)),
+ !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
+ !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
!if(VT.isFP, FP32InputMods, Int32InputMods)));
+ Operand ret = !if(!and(IsTrue16, !eq(DstVT.Size, 16)), T16Dst, Normal);
}
-class getOpSelMod <ValueType VT> {
- Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
- !eq(VT, bf16) : FP16InputMods,
- !eq(VT, v2f16) : PackedF16InputMods,
- !eq(VT, v2bf16) : PackedF16InputMods,
- 1 : IntOpSelMods);
-}
+class getSrcMod<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> : getSrc0Mod<VT, f128/*Dummy Arg*/, IsTrue16, IsFake16>;
// Return type of input modifiers operand specified input operand for DPP
class getSrcModDPP <ValueType VT> {
@@ -1693,18 +1763,42 @@ class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
Operand ret =
!if (VT.isFP,
!if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
- FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
+ FPT16_Lo128VRegInputMods<IsFake16>, FPVRegInputMods),
!if (!eq(VT.Value, i16.Value),
- IntT16VRegInputMods<IsFake16>, IntVRegInputMods));
+ IntT16_Lo128VRegInputMods<IsFake16>, IntVRegInputMods));
}
// Return type of input modifiers operand for specified input operand for DPP
-class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
+// True16: If the destination is a 16-bit value,...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/104510
More information about the llvm-commits
mailing list