[llvm] [AMDGPU][NFC] Run the general bf16 tests for GFX950. (PR #149796)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 04:07:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ivan Kosarev (kosarev)
<details>
<summary>Changes</summary>
---
Patch is 679.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149796.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+8801-5328)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index cd6d741beeab3..7859fcdfe8a70 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -2,7 +2,8 @@
; RUN: llc < %s -mtriple=amdgcn | FileCheck %s -check-prefixes=GCN
; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii | FileCheck %s -check-prefixes=GFX7
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga | FileCheck %s -check-prefixes=GFX8
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s -check-prefixes=GFX9
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s -check-prefixes=GFX9,GFX900
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 | FileCheck %s -check-prefixes=GFX9,GFX950
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s -check-prefixes=GFX10
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11TRUE16
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11FAKE16
@@ -967,12 +968,21 @@ define void @v_store_global_v2bf16(<2 x bfloat> %val, ptr addrspace(1) %ptr) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_store_global_v2bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_store_dword v[1:2], v0, off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_store_global_v2bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_store_dword v[1:2], v0, off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_store_global_v2bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v3, v2
+; GFX950-NEXT: v_mov_b32_e32 v2, v1
+; GFX950-NEXT: global_store_dword v[2:3], v0, off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_store_global_v2bf16:
; GFX10: ; %bb.0:
@@ -2019,23 +2029,41 @@ define void @v_store_global_v64bf16(<64 x bfloat> %val, ptr addrspace(1) %ptr) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_store_global_v64bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[28:31], off offset:112
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[24:27], off offset:96
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[20:23], off offset:80
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[16:19], off offset:64
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[12:15], off offset:48
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[8:11], off offset:32
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[4:7], off offset:16
-; GFX9-NEXT: global_store_dwordx4 v[32:33], v[0:3], off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_store_global_v64bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[28:31], off offset:112
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[24:27], off offset:96
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[20:23], off offset:80
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[16:19], off offset:64
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[12:15], off offset:48
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[8:11], off offset:32
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[4:7], off offset:16
+; GFX900-NEXT: global_store_dwordx4 v[32:33], v[0:3], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_store_global_v64bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[28:31], off offset:112
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[24:27], off offset:96
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[20:23], off offset:80
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[16:19], off offset:64
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[12:15], off offset:48
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[8:11], off offset:32
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[4:7], off offset:16
+; GFX950-NEXT: global_store_dwordx4 v[32:33], v[0:3], off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_store_global_v64bf16:
; GFX10: ; %bb.0:
@@ -2204,20 +2232,30 @@ define void @test_load_store_f32_to_bf16(ptr addrspace(1) %in, ptr addrspace(1)
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_load_store_f32_to_bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v0
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
-; GFX9-NEXT: global_store_short_d16_hi v[2:3], v0, off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: test_load_store_f32_to_bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dword v0, v[0:1], off
+; GFX900-NEXT: s_movk_i32 s4, 0x7fff
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX900-NEXT: v_or_b32_e32 v4, 0x400000, v0
+; GFX900-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX900-NEXT: global_store_short_d16_hi v[2:3], v0, off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_load_store_f32_to_bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX950-NEXT: global_store_short v[2:3], v0, off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_load_store_f32_to_bf16:
; GFX10: ; %bb.0:
@@ -2308,30 +2346,50 @@ define void @test_load_store_f64_to_bf16(ptr addrspace(1) %in, ptr addrspace(1)
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_load_store_f64_to_bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
-; GFX9-NEXT: s_movk_i32 s8, 0x7fff
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cvt_f32_f64_e32 v6, v[0:1]
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
-; GFX9-NEXT: v_and_b32_e32 v7, 1, v6
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v7
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], |v[0:1]|, |v[4:5]|
-; GFX9-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[6:7]
-; GFX9-NEXT: v_add_u32_e32 v4, v6, v4
-; GFX9-NEXT: s_or_b64 vcc, vcc, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GFX9-NEXT: v_bfe_u32 v5, v4, 16, 1
-; GFX9-NEXT: v_or_b32_e32 v6, 0x400000, v4
-; GFX9-NEXT: v_add3_u32 v4, v5, v4, s8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
-; GFX9-NEXT: global_store_short_d16_hi v[2:3], v0, off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: test_load_store_f64_to_bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX900-NEXT: s_movk_i32 s8, 0x7fff
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cvt_f32_f64_e32 v6, v[0:1]
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
+; GFX900-NEXT: v_and_b32_e32 v7, 1, v6
+; GFX900-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v7
+; GFX900-NEXT: v_cmp_gt_f64_e64 s[6:7], |v[0:1]|, |v[4:5]|
+; GFX900-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[6:7]
+; GFX900-NEXT: v_add_u32_e32 v4, v6, v4
+; GFX900-NEXT: s_or_b64 vcc, vcc, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT: v_bfe_u32 v5, v4, 16, 1
+; GFX900-NEXT: v_or_b32_e32 v6, 0x400000, v4
+; GFX900-NEXT: v_add3_u32 v4, v5, v4, s8
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
+; GFX900-NEXT: global_store_short_d16_hi v[2:3], v0, off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_load_store_f64_to_bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_cvt_f32_f64_e32 v6, v[0:1]
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
+; GFX950-NEXT: v_and_b32_e32 v7, 1, v6
+; GFX950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]|
+; GFX950-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v7
+; GFX950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
+; GFX950-NEXT: v_add_u32_e32 v0, v6, v0
+; GFX950-NEXT: s_or_b64 vcc, vcc, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX950-NEXT: global_store_short v[2:3], v0, off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_load_store_f64_to_bf16:
; GFX10: ; %bb.0:
@@ -2858,12 +2916,21 @@ define void @test_arg_store(bfloat %in, ptr addrspace(1) %out) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_arg_store:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_store_short v[1:2], v0, off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: test_arg_store:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_store_short v[1:2], v0, off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_arg_store:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v3, v2
+; GFX950-NEXT: v_mov_b32_e32 v2, v1
+; GFX950-NEXT: global_store_short v[2:3], v0, off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_arg_store:
; GFX10: ; %bb.0:
@@ -2918,12 +2985,21 @@ define void @test_arg_store_v2bf16(<2 x bfloat> %in, ptr addrspace(1) %out) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_arg_store_v2bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_store_dword v[1:2], v0, off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: test_arg_store_v2bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_store_dword v[1:2], v0, off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_arg_store_v2bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v3, v2
+; GFX950-NEXT: v_mov_b32_e32 v2, v1
+; GFX950-NEXT: global_store_dword v[2:3], v0, off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_arg_store_v2bf16:
; GFX10: ; %bb.0:
@@ -3384,12 +3460,19 @@ define bfloat @test_byval(ptr addrspace(5) byval(bfloat) %bv, bfloat %val) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_byval:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_store_short v0, off, s[0:3], s32
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: test_byval:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: buffer_store_short v0, off, s[0:3], s32
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_byval:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_store_short off, v0, s32
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_byval:
; GFX10: ; %bb.0:
@@ -3440,12 +3523,19 @@ define void @test_sret(ptr addrspace(5) sret(bfloat) %sret, bfloat %val) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_sret:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: test_sret:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_sret:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_store_short v0, v1, off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_sret:
; GFX10: ; %bb.0:
@@ -3907,34 +3997,63 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_call:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_xor_saveexec_b64 s[16:17], -1
-; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[16:17]
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: s_getpc_b64 s[16:17]
-; GFX9-NEXT: s_add_u32 s16, s16, test_arg_store at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s17, s17, test_arg_store at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
-; GFX9-NEXT: v_writelane_b32 v2, s30, 0
-; GFX9-NEXT: v_writelane_b32 v2, s31, 1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_readlane_b32 s31, v2, 1
-; GFX9-NEXT: v_readlane_b32 s30, v2, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_mov_b32 s33, s18
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: test_call:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s18, s33
+; GFX900-NEXT: s_mov_b32 s33, s32
+; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1
+; GFX900-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[16:17]
+; GFX900-NEXT: s_addk_i32 s32, 0x400
+; GFX900-NEXT: s_getpc_b64 s[16:17]
+; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store at gotpcrel32@lo+4
+; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store at gotpcrel32@hi+12
+; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
+; GFX900-NEXT: v_writelane_b32 v2, s30, 0
+; GFX900-NEXT: v_writelane_b32 v2, s31, 1
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX900-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_readlane_b32 s31, v2, 1
+; GFX900-NEXT: v_readlane_b32 s30, v2, 0
+; GFX900-NEXT: s_mov_b32 s32, s33
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_mov_b32 s33, s18
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_call:
+; GFX950: ; %bb.0: ; %entry
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: s_mov_b32 s2, s33
+; GFX950-NEXT: s_mov_b32 s33, s32
+; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX950-NEXT: scratch_store_dword off, v4, s33 ; 4-byte Folded Spill
+; GFX950-NEXT: s_mov_b64 exec, s[0:1]
+; GFX950-NEXT: s_add_i32 s32, s32, 16
+; GFX950-NEXT: s_getpc_b64 s[0:1]
+; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store at gotpcrel32@lo+4
+; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store at gotpcrel32@hi+12
+; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX950-NEXT: v_writelane_b32 v4, s30, 0
+; GFX950-NEXT: v_writelane_b32 v4, s31, 1
+; GFX950-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX950-NEXT: scratch_store_short v1, v0, off sc0 sc1
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_readlane_b32 s31, v4, 1
+; GFX950-NEXT: v_readlane_b32 s30, v4, 0
+; GFX950-NEXT: s_mov_b32 s32, s33
+; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX950-NEXT: scratch_load_dword v4, off, s33 ; 4-byte Folded Reload
+; GFX950-NEXT: s_mov_b64 exec, s[0:1]
+; GFX950-NEXT: s_mov_b32 s33, s2
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call:
; GFX10: ; %bb.0: ; %entry
@@ -4104,34 +4223,63 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: test_call_v2bf16:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_xor_saveexec_b64 s[16:17], -1
-; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[16:17]
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: s_getpc_b64 s[16:17]
-; GFX9-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16 at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16 at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
-; GFX9-NEXT: v_writelane_b32 v2, s30, 0
-; GFX9-NEXT: v_writelane_b32 v2, s31, 1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_readlane_b32 s31, v2, 1
-; GFX9-NEXT: v_readlane_b32 s30, v2, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_mov_b32 s33, s18
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/149796
More information about the llvm-commits
mailing list