[clang] [llvm] [AMDGPU] Add support for `v_tanh_bf16` on gfx1250 (PR #147425)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 11:51:29 PDT 2025
================
@@ -2,169 +2,69 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
-0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
+0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
-0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0xc1,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xca,0xd5,0xc1,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xca,0xd5,0xc1,0x00,0x00,0x00]
-0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0xf0,0x00,0x00,0x08
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xca,0xd5,0xf0,0x00,0x00,0x08]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xca,0xd5,0xf0,0x00,0x00,0x08]
-0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xca,0xd5,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xca,0xd5,0x7f,0x00,0x00,0x00]
-0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x7e,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xca,0xd5,0x7e,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xca,0xd5,0x7e,0x00,0x00,0x00]
-0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x7d,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xca,0xd5,0x7d,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xca,0xd5,0x7d,0x00,0x00,0x00]
-0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x7c,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, null ; encoding: [0x05,0x00,0xca,0xd5,0x7c,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, null ; encoding: [0x05,0x00,0xca,0xd5,0x7c,0x00,0x00,0x00]
-0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x01,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x00,0x00,0x00]
-0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x69,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xca,0xd5,0x69,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xca,0xd5,0x69,0x00,0x00,0x00]
-0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
+0x05,0x00,0xca,0xd5,0xfd,0x00,0x00,0x10
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0xfd,0x00,0x00,0x10]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0xfd,0x00,0x00,0x10]
-0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x7b,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xca,0xd5,0x7b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xca,0xd5,0x7b,0x00,0x00,0x00]
-0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00
-# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x01,0x01,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x01,0x00,0x00]
-0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_fp8_e64 v1, 3 ; encoding: [0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0xff,0x01,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xca,0xd5,0xff,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xca,0xd5,0xff,0x01,0x00,0x00]
-0x01,0x10,0xec,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_fp8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xec,0xd5,0x83,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x6b,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xca,0xd5,0x6b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xca,0xd5,0x6b,0x00,0x00,0x00]
-0x01,0x08,0xec,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_fp8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xec,0xd5,0x83,0x00,0x00,0x00]
+0x05,0x00,0xca,0xd5,0x6a,0x00,0x00,0x00
+# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xca,0xd5,0x6a,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_tanh_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xca,0xd5,0x6a,0x00,0x00,0x00]
-0x01,0x18,0xec,0xd5,0x83,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_fp8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xec,0xd5,0x83,0x00,0x00,0x00]
-
-0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00
----------------
shiltian wrote:
Hmm, I thought I fixed all of them.
https://github.com/llvm/llvm-project/pull/147425
More information about the llvm-commits
mailing list