[llvm] [AMDGPU][True16][MC] VINTERP instructions supporting true16/fake16 (PR #113634)
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 13:34:45 PST 2024
================
@@ -0,0 +1,723 @@
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK,GFX11-TRUE16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK,GFX12-TRUE16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK,GFX11-FAKE16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble %s | FileCheck -strict-whitespace -check-prefixes=CHECK,GFX12-FAKE16 %s
+
+0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0
+
+# Check that unused bits in the encoding are ignored.
+0x00,0x00,0x80,0xcd,0x01,0x05,0x0e,0x1c
+# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0
+
+0x01,0x00,0x00,0xcd,0x0a,0x29,0x7a,0x04
+# CHECK: v_interp_p10_f32 v1, v10, v20, v30 wait_exp:0
+
+0x02,0x00,0x00,0xcd,0x0b,0x2b,0x7e,0x04
+# CHECK: v_interp_p10_f32 v2, v11, v21, v31 wait_exp:0
+
+0x03,0x00,0x00,0xcd,0x0c,0x2d,0x82,0x04
+# CHECK: v_interp_p10_f32 v3, v12, v22, v32 wait_exp:0
+
+0x00,0x80,0x00,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:0
+
+0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24
+# CHECK: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0
+
+0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44
+# CHECK: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0
+
+0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84
+# CHECK: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0
+
+0x00,0x01,0x00,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1
+
+0x00,0x07,0x00,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7
+
+0x00,0x87,0x00,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7
+
+0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0
+
+0x01,0x00,0x01,0xcd,0x0a,0x29,0x7a,0x04
+# CHECK: v_interp_p2_f32 v1, v10, v20, v30 wait_exp:0
+
+0x02,0x00,0x01,0xcd,0x0b,0x2b,0x7e,0x04
+# CHECK: v_interp_p2_f32 v2, v11, v21, v31 wait_exp:0
+
+0x03,0x00,0x01,0xcd,0x0c,0x2d,0x82,0x04
+# CHECK: v_interp_p2_f32 v3, v12, v22, v32 wait_exp:0
+
+0x00,0x80,0x01,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:0
+
+0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24
+# CHECK: v_interp_p2_f32 v0, -v1, v2, v3 wait_exp:0
+
+0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44
+# CHECK: v_interp_p2_f32 v0, v1, -v2, v3 wait_exp:0
+
+0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84
+# CHECK: v_interp_p2_f32 v0, v1, v2, -v3 wait_exp:0
+
+0x00,0x01,0x01,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1
+
+0x00,0x07,0x01,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7
+
+0x00,0x87,0x01,0xcd,0x01,0x05,0x0e,0x04
+# CHECK: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, -v1.l, v2, v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, -v1.l, v2, v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, -v2, v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, -v2, v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, -v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, -v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0
+
+0x00,0x80,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l clamp wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l clamp wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0
+
+0x00,0x01,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:1
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:1
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1
+
+0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7
+
+0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0
+
+0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0
+
+0x00,0x28,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h op_sel:[1,0,1,0] wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h op_sel:[1,0,1,0] wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,1,0] wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,1,0] wait_exp:0
+
+0x00,0x0d,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:5
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:5
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:5
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:5
+
+0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp op_sel:[1,0,1,0] wait_exp:5
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp op_sel:[1,0,1,0] wait_exp:5
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,1,0] wait_exp:5
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,1,0] wait_exp:5
+
+0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0xe4
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp op_sel:[1,0,1,0] wait_exp:5
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp op_sel:[1,0,1,0] wait_exp:5
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,1,0] wait_exp:5
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,1,0] wait_exp:5
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, -v1.l, v2, v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, -v1.l, v2, v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, -v2, v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, -v2, v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, -v3.l wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, -v3.l wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0
+
+0x00,0x80,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l clamp wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l clamp wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp wait_exp:0
+
+0x00,0x01,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:1
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:1
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1
+
+0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7
+
+0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] wait_exp:0
+
+0x00,0x10,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l op_sel:[0,1,0,0] wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l op_sel:[0,1,0,0] wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] wait_exp:0
+
+0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] wait_exp:0
+
+0x00,0x40,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l op_sel:[0,0,0,1] wait_exp:0
+# GFX12-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l op_sel:[0,0,0,1] wait_exp:0
+# GFX11-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0
+# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0
+
+0x00,0x78,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h op_sel:[1,1,1,1] wait_exp:0
----------------
Sisyph wrote:
These tests where op_sel is applied to 32-bit arguments do not make sense, as you should not apply op_sel to those. However, I see they have existed a while, and were just ported in this patch. And we generally are not strictly rejecting things in the disassembler.
https://github.com/llvm/llvm-project/pull/113634
More information about the llvm-commits
mailing list