[llvm] AMDGPU: Remove FeatureCvtFP8VOP1Bug from gfx950 (PR #117827)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 26 22:24:49 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117827

>From 11464f05f7c18846ed6b4f19fd497fd11fa17e63 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 15 Nov 2024 08:13:53 -0800
Subject: [PATCH] AMDGPU: Remove FeatureCvtFP8VOP1Bug from gfx950

---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |   1 -
 .../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll     | 328 +++++++++---------
 2 files changed, 170 insertions(+), 159 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6bac2d2b590ffa..5978f5b0bbae5f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1599,7 +1599,6 @@ def FeatureISAVersion9_5_Common : FeatureSet<
   [FeatureAddressableLocalMemorySize163840,
    FeatureFP8Insts,
    FeatureFP8ConversionInsts,
-   FeatureCvtFP8VOP1Bug,
    FeatureGFX950Insts,
    FeaturePrngInst,
    FeatureBF16ConversionInsts,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index bd35ee3f009736..3f418ee80f8771 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
@@ -22,6 +22,12 @@ define float @test_cvt_f32_bf8_byte0(i32 %a) {
 ; GFX940-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-LABEL: test_cvt_f32_bf8_byte0:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cvt_f32_bf8_e32 v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX12-LABEL: test_cvt_f32_bf8_byte0:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -36,11 +42,11 @@ define float @test_cvt_f32_bf8_byte0(i32 %a) {
 }
 
 define float @test_cvt_f32_bf8_byte1(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_bf8_byte1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_bf8_byte1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_f32_bf8_byte1:
 ; GFX12:       ; %bb.0:
@@ -56,11 +62,11 @@ define float @test_cvt_f32_bf8_byte1(i32 %a) {
 }
 
 define float @test_cvt_f32_bf8_byte2(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_bf8_byte2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_bf8_byte2:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_f32_bf8_byte2:
 ; GFX12:       ; %bb.0:
@@ -76,11 +82,11 @@ define float @test_cvt_f32_bf8_byte2(i32 %a) {
 }
 
 define float @test_cvt_f32_bf8_byte3(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_bf8_byte3:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_bf8_byte3:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_f32_bf8_byte3:
 ; GFX12:       ; %bb.0:
@@ -102,6 +108,12 @@ define float @test_cvt_f32_fp8_byte0(i32 %a) {
 ; GFX940-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-LABEL: test_cvt_f32_fp8_byte0:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cvt_f32_fp8_e32 v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX12-LABEL: test_cvt_f32_fp8_byte0:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -116,11 +128,11 @@ define float @test_cvt_f32_fp8_byte0(i32 %a) {
 }
 
 define float @test_cvt_f32_fp8_byte1(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_fp8_byte1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_fp8_byte1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_f32_fp8_byte1:
 ; GFX12:       ; %bb.0:
@@ -136,11 +148,11 @@ define float @test_cvt_f32_fp8_byte1(i32 %a) {
 }
 
 define float @test_cvt_f32_fp8_byte2(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_fp8_byte2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_fp8_byte2:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_f32_fp8_byte2:
 ; GFX12:       ; %bb.0:
@@ -156,11 +168,11 @@ define float @test_cvt_f32_fp8_byte2(i32 %a) {
 }
 
 define float @test_cvt_f32_fp8_byte3(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_fp8_byte3:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_fp8_byte3:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_f32_fp8_byte3:
 ; GFX12:       ; %bb.0:
@@ -176,11 +188,11 @@ define float @test_cvt_f32_fp8_byte3(i32 %a) {
 }
 
 define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_bf8_word0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_f32_bf8_e32 v[0:1], v0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_bf8_word0:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_f32_bf8_e32 v[0:1], v0
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_f32_bf8_word0:
 ; GFX12:       ; %bb.0:
@@ -196,11 +208,11 @@ define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
 }
 
 define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_bf8_word1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_bf8_word1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_f32_bf8_word1:
 ; GFX12:       ; %bb.0:
@@ -216,11 +228,11 @@ define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
 }
 
 define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_fp8_word0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_f32_fp8_e32 v[0:1], v0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_fp8_word0:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_f32_fp8_word0:
 ; GFX12:       ; %bb.0:
@@ -236,11 +248,11 @@ define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
 }
 
 define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_fp8_word1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_fp8_word1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_f32_fp8_word1:
 ; GFX12:       ; %bb.0:
@@ -256,12 +268,12 @@ define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
 }
 
 define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_bf8_f32_word0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_bf8_f32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_bf8_f32_word0:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_bf8_f32 v2, v0, v1
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_bf8_f32_word0:
 ; GFX12:       ; %bb.0:
@@ -279,13 +291,13 @@ define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
 }
 
 define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_bf8_f32_word1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_bf8_f32_word1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_bf8_f32_word1:
 ; GFX12:       ; %bb.0:
@@ -303,12 +315,12 @@ define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
 }
 
 define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_fp8_f32_word0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_fp8_f32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_fp8_f32_word0:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_fp8_f32 v2, v0, v1
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_fp8_f32_word0:
 ; GFX12:       ; %bb.0:
@@ -326,13 +338,13 @@ define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
 }
 
 define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_fp8_f32_word1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_fp8_f32_word1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_pk_fp8_f32_word1:
 ; GFX12:       ; %bb.0:
@@ -350,12 +362,12 @@ define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
 }
 
 define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte0:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte0:
 ; GFX12:       ; %bb.0:
@@ -373,13 +385,13 @@ define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
 }
 
 define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte1:
 ; GFX12:       ; %bb.0:
@@ -397,13 +409,13 @@ define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
 }
 
 define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte2:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte2:
 ; GFX12:       ; %bb.0:
@@ -421,13 +433,13 @@ define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
 }
 
 define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte3:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte3:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte3:
 ; GFX12:       ; %bb.0:
@@ -445,12 +457,12 @@ define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
 }
 
 define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte0:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte0:
 ; GFX12:       ; %bb.0:
@@ -468,13 +480,13 @@ define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
 }
 
 define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1:
 ; GFX12:       ; %bb.0:
@@ -492,13 +504,13 @@ define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
 }
 
 define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte2:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2:
 ; GFX12:       ; %bb.0:
@@ -516,13 +528,13 @@ define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
 }
 
 define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte3:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte3:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
+; GFX9X-NEXT:    s_nop 0
+; GFX9X-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte3:
 ; GFX12:       ; %bb.0:
@@ -540,12 +552,12 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
 }
 
 define float @test_sext_cvt_f32_fp8(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_f32_fp8:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_f32_fp8:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_sext_cvt_f32_fp8:
 ; GFX12:       ; %bb.0:
@@ -564,12 +576,12 @@ define float @test_sext_cvt_f32_fp8(i16 %a) {
 }
 
 define float @test_sext_cvt_f32_bf8(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_f32_bf8:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_f32_bf8:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_sext_cvt_f32_bf8:
 ; GFX12:       ; %bb.0:
@@ -588,12 +600,12 @@ define float @test_sext_cvt_f32_bf8(i16 %a) {
 }
 
 define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT:    v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT:    v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
 ; GFX12:       ; %bb.0:
@@ -612,12 +624,12 @@ define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
 }
 
 define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT:    v_cvt_pk_f32_fp8_e32 v[0:1], v0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX9X:       ; %bb.0:
+; GFX9X-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT:    v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX9X-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
 ; GFX12:       ; %bb.0:



More information about the llvm-commits mailing list