[llvm] [AMDGPU][MC][True16] VOP3dot instruction update for true16/fake16 (PR #113474)
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 12:27:58 PST 2024
================
@@ -3395,32 +3395,92 @@
# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
# op_sel[1:0] are ignored
-# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
0x00,0x78,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
-# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
-# GFX12: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+
+# GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# GFX11: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
# op_sel[1:0] are ignored
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
0x00,0x78,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
----------------
Sisyph wrote:
GFX11 in a gfx12 file, something is wrong.
https://github.com/llvm/llvm-project/pull/113474
More information about the llvm-commits
mailing list