[llvm] [AMDGPU][MC][True16] VOP3dot instruction update for true16/fake16 (PR #113474)

Joe Nash via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 12:27:58 PST 2024


================
@@ -3395,32 +3395,92 @@
 # W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
 0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
 
-# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
 0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
 
 # op_sel[1:0] are ignored
-# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
 0x00,0x78,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
 
-# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
 0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
 
-# GFX12: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
 0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92
 
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+
+# GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# GFX11: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
 0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
 
 # op_sel[1:0] are ignored
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
 0x00,0x78,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
 
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
 0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
 
-# GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92]
 0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92
 
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
----------------
Sisyph wrote:

GFX11 in a gfx12 file, something is wrong.

https://github.com/llvm/llvm-project/pull/113474


More information about the llvm-commits mailing list