[llvm] [AMDGPU][MC][True16] VOP3dot instruction update for true16/fake16 (PR #113474)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 08:38:51 PST 2024
================
@@ -2119,104 +2119,116 @@ v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2
// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-v_dot2_bf16_bf16 v5, v1, v2, s3
+v_dot2_bf16_bf16 v5.l, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00]
-v_dot2_bf16_bf16 v5, v255, v255, s105
+v_dot2_bf16_bf16 v5.l, v255, v255, s105
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01]
-v_dot2_bf16_bf16 v5, s1, s2, v3
+v_dot2_bf16_bf16 v5.l, s1, s2, v3.l
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04]
-v_dot2_bf16_bf16 v5, s105, s105, m0
+v_dot2_bf16_bf16 v5.l, s105, s105, m0
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01]
-v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
+v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
+// GFX11: encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
-v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi
+v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00]
-v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15
+v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01]
-v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo
+v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo
// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81]
-v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b|
+v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b|
// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00]
-v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo|
+v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo|
// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1]
-v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc|
+v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc|
// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43]
-v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0]
+v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi|
// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1]
-v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23]
+v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23]
-v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
+v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
-v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
-// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l
+// GFX11: encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
-v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
-// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l
+// GFX11: encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
-v_dot2_f16_f16 v5, v1, v2, s3
+v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
+// GFX11: encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
+
+v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+
+v_dot2_f16_f16 v5.l, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
-v_dot2_f16_f16 v5, v255, s2, s105
+v_dot2_f16_f16 v5.l, v255, s2, s105
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01]
-v_dot2_f16_f16 v5, s1, v255, exec_hi
+v_dot2_f16_f16 v5.l, s1, v255, exec_hi
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01]
-v_dot2_f16_f16 v5, s105, s105, exec_lo
+v_dot2_f16_f16 v5.l, s105, s105, exec_lo
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01]
-v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3
+v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04]
-v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
+v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1]
-v_dot2_f16_f16 v5, m0, 0.5, m0
+v_dot2_f16_f16 v5.l, m0, 0.5, m0
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01]
-v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi
+v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi
// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01]
-v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo|
+v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1]
-v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b|
+v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b|
// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
-v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc|
+v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc|
// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3]
-v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0]
+v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43]
-v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23]
+v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: encoding: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23]
----------------
broxigarchen wrote:
I believe the old op_sel testline is selecting .h on the -1 and the assembely does matched with it
https://github.com/llvm/llvm-project/pull/113474
More information about the llvm-commits
mailing list