[llvm] [AMDGPU] Fix v_dot2_f16_f16/v_dot2_bf16_bf16 operands (PR #82423)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 20 13:34:38 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
src0 and src1 are packed f16/bf16, we are printing literals like 0x40002000, but we cannot parse it.
---
Full diff: https://github.com/llvm/llvm-project/pull/82423.diff
5 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+8-6)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+1-12)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3.s (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt (+12)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 85bd33e4efbd0f..5b32b34079f44e 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -323,6 +323,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
}
+ bool isPackedFP16InputMods() const {
+ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
+ }
bool isVReg() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index cd14c12a8a80c6..97c723752b70b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1289,9 +1289,8 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "PackedFP"#opSize#"InputMods";
- let ParserMethod = "parseRegOrImm";
- let PredicateMethod = "isRegOrImm";
-// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
+ let ParserMethod = "parseRegOrImmWithFPInputMods";
+ let PredicateMethod = "isPackedFP"#opSize#"InputMods";
}
class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1305,7 +1304,7 @@ def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
-// let PrintMethod = "printPackedFPInputMods";
+ let PrintMethod = "printOperandAndFPInputMods";
}
class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
@@ -1606,8 +1605,11 @@ class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
}
class getOpSelMod <ValueType VT> {
- Operand ret = !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
- FP16InputMods, IntOpSelMods);
+ Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
+ !eq(VT, bf16) : FP16InputMods,
+ !eq(VT, v2f16) : PackedF16InputMods,
+ !eq(VT, v2bf16) : PackedF16InputMods,
+ 1 : IntOpSelMods);
}
// Return type of input modifiers operand specified input operand for DPP
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 35cffa22f45929..910d7456fc4c5f 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -866,20 +866,9 @@ def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>;
def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
}
-class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
+class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
let HasClamp = 0;
let HasOMod = 0;
- // Override modifiers for bf16(i16) (same as float modifiers).
- let HasSrc0Mods = 1;
- let HasSrc1Mods = 1;
- let HasSrc2Mods = 1;
- let Src0ModVOP3DPP = FPVRegInputMods;
- let Src1ModVOP3DPP = FPVRegInputMods;
- let Src2ModVOP3DPP = FP16InputMods;
- let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasClamp, HasOMod, FP16InputMods,
- FP16InputMods, FP16InputMods>.ret;
- let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, HasOMod, 1, 1, 1>.ret;
}
let SubtargetPredicate = isGFX11Plus in {
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 9a94162005e1f7..d288c02a22c921 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -2116,6 +2116,12 @@ v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
+// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
+// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+
v_dot2_f16_f16 v5, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
@@ -2161,6 +2167,12 @@ v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v2, v0, 0x20004000, v2
+// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_f16_f16 v2, 0x20004000, v0, v2
+// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+
v_fma_dx9_zero_f32 v5, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
index 7674c02185b5f2..fc35a2e6b4f8f4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -1788,6 +1788,12 @@
# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
+# GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20
+
+# GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20
+
# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00
@@ -1833,6 +1839,12 @@
# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
+# GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20
+
+# GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20
+
# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00
``````````
</details>
https://github.com/llvm/llvm-project/pull/82423
More information about the llvm-commits
mailing list