[llvm] [AMDGPU][True16][MC] 16bit vsrc and vdst support in MC (PR #104510)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 12:56:39 PDT 2024
================
@@ -448,51 +448,70 @@
# GFX12: v_cvt_pk_f32_fp8_e64 v[2:3], v3 ; encoding: [0x02,0x00,0xee,0xd5,0x03,0x01,0x00,0x00]
0x02,0x00,0xee,0xd5,0x03,0x01,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00
-# GFX12: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08
-# GFX12: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10]
0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10
-# GFX12: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX12-REAL16: v_cvt_f16_f32_e64 v255.l, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
+# GFX12-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
----------------
broxigarchen wrote:
one thing I noticed is that in the dasm test, the disassemble of "vop3_from_vop1" instructions do not error out when hi16 bit is set in fake16 flow. For now the disassembler just ignore the hi16 bit and use lo16.
In gfx11, we have all hi16 in a seperated test file vop3_from_vop1_hi.txt while do not test the fake16 on it. So I did not notice this before.
@Sisyph @kosarev Do we need to error out in disassebler in this case?
https://github.com/llvm/llvm-project/pull/104510
More information about the llvm-commits
mailing list