[llvm] [AMDGPU][MC][True16] Support VOP2 instructions with true16 format (PR #115233)
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 15 11:03:04 PST 2024
================
@@ -446,49 +446,78 @@
# GFX12: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf]
0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, v1.l, v2.l, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, v127.l, v2.l, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, s1, v2.l, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, s105, v2.l, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, vcc_lo, v2.l, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, vcc_hi, v2.l, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, ttmp15, v2.l, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, m0, v2.l, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, exec_lo, v2.l, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, exec_hi, v2.l, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, null, v2.l, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, -1, v2.l, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, 0.5, v2.l, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, src_scc, v2.l, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v127.l, 0xfe0b, v127.l, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
+# GFX12-REAL16: v_fmaak_f16 v5.l, v1.h, v2.l, 0xfe0b ; encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2, 0xfe0b ; encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
----------------
Sisyph wrote:
Do you think it's a likely scenario that someone makes a change that affects a meaningless fake16 test line and doesn't know how to remediate that? Perhaps they will just run the update script and be happy.
Alternatively, can the update script can be enhanced to support automatically removing these cases? Some logic like this: If REAL16 and FAKE16 lines differ after removing all instances of '.l' and '.h' from the REAL16 line, delete the FAKE16 line check line (because the test is for REAL16 bytes)
https://github.com/llvm/llvm-project/pull/115233
More information about the llvm-commits
mailing list