[llvm] [AMDGPU][MC][True16] Support VOP2 instructions with true16 format (PR #115233)

Tue Nov 12 09:52:21 PST 2024

================
@@ -446,49 +446,78 @@
 # GFX12: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, v1, v2, 0xfe0b          ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, v1.l, v2.l, 0xfe0b    ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v1, v2, 0xfe0b          ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, v127, v2, 0xfe0b        ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, v127.l, v2.l, 0xfe0b  ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v127, v2, 0xfe0b        ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, s1, v2, 0xfe0b          ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, s1, v2.l, 0xfe0b      ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, s1, v2, 0xfe0b          ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, s105, v2, 0xfe0b        ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, s105, v2.l, 0xfe0b    ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, s105, v2, 0xfe0b        ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b      ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, vcc_lo, v2.l, 0xfe0b  ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b      ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b      ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, vcc_hi, v2.l, 0xfe0b  ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b      ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b      ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, ttmp15, v2.l, 0xfe0b  ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b      ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, m0, v2, 0xfe0b          ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, m0, v2.l, 0xfe0b      ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, m0, v2, 0xfe0b          ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b     ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, exec_lo, v2.l, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b     ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b     ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, exec_hi, v2.l, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b     ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, null, v2, 0xfe0b        ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, null, v2.l, 0xfe0b    ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, null, v2, 0xfe0b        ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, -1, v2, 0xfe0b          ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, -1, v2.l, 0xfe0b      ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, -1, v2, 0xfe0b          ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, 0.5, v2, 0xfe0b         ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, 0.5, v2.l, 0xfe0b     ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, 0.5, v2, 0xfe0b         ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, src_scc, v2, 0xfe0b     ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, src_scc, v2.l, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, src_scc, v2, 0xfe0b     ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b  ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v127.l, 0xfe0b, v127.l, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b  ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
+# GFX12-REAL16: v_fmaak_f16 v5.l, v1.h, v2.l, 0xfe0b    ; encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2, 0xfe0b ; encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
----------------
broxigarchen wrote:

I am currently updating the test with the update_mc script and the result depends on the llvm-mc output in stdout. I think there are three output now:
1. dasm gives nothing in fake16 when .h is used
2. dasm gives an error message in fake16 when .h in use, which is the case here
3. dasm ignore the .h and generate all .l instructions

I haven't check in details to see why, but my impression is that it related with the number of bytes of these instructions. 

Do you think it's ok we leave it as it? as should we further remove the fake16 check lines for .h register?

https://github.com/llvm/llvm-project/pull/115233