[llvm] [AMDGPU][MC][True16] Support VOP2 instructions with true16 format (PR #115233)

Joe Nash via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 12 10:12:46 PST 2024


================
@@ -446,49 +446,78 @@
 # GFX12: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf]
 
 0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, v1, v2, 0xfe0b          ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, v1.l, v2.l, 0xfe0b    ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v1, v2, 0xfe0b          ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, v127, v2, 0xfe0b        ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, v127.l, v2.l, 0xfe0b  ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v127, v2, 0xfe0b        ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, s1, v2, 0xfe0b          ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, s1, v2.l, 0xfe0b      ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, s1, v2, 0xfe0b          ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, s105, v2, 0xfe0b        ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, s105, v2.l, 0xfe0b    ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, s105, v2, 0xfe0b        ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b      ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, vcc_lo, v2.l, 0xfe0b  ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b      ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b      ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, vcc_hi, v2.l, 0xfe0b  ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b      ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b      ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, ttmp15, v2.l, 0xfe0b  ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b      ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, m0, v2, 0xfe0b          ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, m0, v2.l, 0xfe0b      ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, m0, v2, 0xfe0b          ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b     ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, exec_lo, v2.l, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b     ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b     ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, exec_hi, v2.l, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b     ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, null, v2, 0xfe0b        ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, null, v2.l, 0xfe0b    ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, null, v2, 0xfe0b        ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, -1, v2, 0xfe0b          ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, -1, v2.l, 0xfe0b      ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, -1, v2, 0xfe0b          ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, 0.5, v2, 0xfe0b         ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, 0.5, v2.l, 0xfe0b     ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, 0.5, v2, 0xfe0b         ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v5, src_scc, v2, 0xfe0b     ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v5.l, src_scc, v2.l, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, src_scc, v2, 0xfe0b     ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
 
 0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00
-# GFX12: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b  ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-REAL16: v_fmaak_f16 v127.l, 0xfe0b, v127.l, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b  ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+
+0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00
+# GFX12-REAL16: v_fmaak_f16 v5.l, v1.h, v2.l, 0xfe0b    ; encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+# GFX12-FAKE16: v_fmaak_f16 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2, 0xfe0b ; encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
----------------
Sisyph wrote:

This sounds to me like the behavior of putting bytes that encode .h registers into the fake16 disassembler is undefined. So we should not have tests of undefined behavior. So remove the Fake16 check lines for those tests. @kosarev what do you think?

https://github.com/llvm/llvm-project/pull/115233


More information about the llvm-commits mailing list