[llvm-branch-commits] [llvm] AMDGPU: Remove FeatureCvtFP8VOP1Bug from gfx950 (PR #117827)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Nov 26 16:53:30 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117827
None
>From 5f45ccd6f0f867fe087a9ace290c41b2f57fc760 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 15 Nov 2024 08:13:53 -0800
Subject: [PATCH] AMDGPU: Remove FeatureCvtFP8VOP1Bug from gfx950
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 1 -
.../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll | 328 +++++++++---------
2 files changed, 170 insertions(+), 159 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6bac2d2b590ffa..5978f5b0bbae5f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1599,7 +1599,6 @@ def FeatureISAVersion9_5_Common : FeatureSet<
[FeatureAddressableLocalMemorySize163840,
FeatureFP8Insts,
FeatureFP8ConversionInsts,
- FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts,
FeaturePrngInst,
FeatureBF16ConversionInsts,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index bd35ee3f009736..3f418ee80f8771 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
@@ -22,6 +22,12 @@ define float @test_cvt_f32_bf8_byte0(i32 %a) {
; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
+; GFX950-LABEL: test_cvt_f32_bf8_byte0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_cvt_f32_bf8_e32 v0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_cvt_f32_bf8_byte0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -36,11 +42,11 @@ define float @test_cvt_f32_bf8_byte0(i32 %a) {
}
define float @test_cvt_f32_bf8_byte1(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_bf8_byte1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_bf8_byte1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_f32_bf8_byte1:
; GFX12: ; %bb.0:
@@ -56,11 +62,11 @@ define float @test_cvt_f32_bf8_byte1(i32 %a) {
}
define float @test_cvt_f32_bf8_byte2(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_bf8_byte2:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_bf8_byte2:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_f32_bf8_byte2:
; GFX12: ; %bb.0:
@@ -76,11 +82,11 @@ define float @test_cvt_f32_bf8_byte2(i32 %a) {
}
define float @test_cvt_f32_bf8_byte3(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_bf8_byte3:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_bf8_byte3:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_f32_bf8_byte3:
; GFX12: ; %bb.0:
@@ -102,6 +108,12 @@ define float @test_cvt_f32_fp8_byte0(i32 %a) {
; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
+; GFX950-LABEL: test_cvt_f32_fp8_byte0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_cvt_f32_fp8_e32 v0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_cvt_f32_fp8_byte0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -116,11 +128,11 @@ define float @test_cvt_f32_fp8_byte0(i32 %a) {
}
define float @test_cvt_f32_fp8_byte1(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_fp8_byte1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_fp8_byte1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_f32_fp8_byte1:
; GFX12: ; %bb.0:
@@ -136,11 +148,11 @@ define float @test_cvt_f32_fp8_byte1(i32 %a) {
}
define float @test_cvt_f32_fp8_byte2(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_fp8_byte2:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_fp8_byte2:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_f32_fp8_byte2:
; GFX12: ; %bb.0:
@@ -156,11 +168,11 @@ define float @test_cvt_f32_fp8_byte2(i32 %a) {
}
define float @test_cvt_f32_fp8_byte3(i32 %a) {
-; GFX940-LABEL: test_cvt_f32_fp8_byte3:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_f32_fp8_byte3:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_f32_fp8_byte3:
; GFX12: ; %bb.0:
@@ -176,11 +188,11 @@ define float @test_cvt_f32_fp8_byte3(i32 %a) {
}
define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_bf8_word0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_bf8_word0:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX12: ; %bb.0:
@@ -196,11 +208,11 @@ define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
}
define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_bf8_word1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_bf8_word1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX12: ; %bb.0:
@@ -216,11 +228,11 @@ define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
}
define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_fp8_word0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_fp8_word0:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX12: ; %bb.0:
@@ -236,11 +248,11 @@ define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
}
define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
-; GFX940-LABEL: test_cvt_pk_f32_fp8_word1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_f32_fp8_word1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX12: ; %bb.0:
@@ -256,12 +268,12 @@ define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
}
define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_bf8_f32_word0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_bf8_f32_word0:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_bf8_f32_word0:
; GFX12: ; %bb.0:
@@ -279,13 +291,13 @@ define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
}
define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_bf8_f32_word1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_bf8_f32_word1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_bf8_f32_word1:
; GFX12: ; %bb.0:
@@ -303,12 +315,12 @@ define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
}
define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_fp8_f32_word0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_fp8_f32_word0:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_fp8_f32_word0:
; GFX12: ; %bb.0:
@@ -326,13 +338,13 @@ define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
}
define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
-; GFX940-LABEL: test_cvt_pk_fp8_f32_word1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_pk_fp8_f32_word1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_fp8_f32_word1:
; GFX12: ; %bb.0:
@@ -350,12 +362,12 @@ define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
}
define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte0:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_bf8_f32_byte0:
; GFX12: ; %bb.0:
@@ -373,13 +385,13 @@ define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
}
define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_bf8_f32_byte1:
; GFX12: ; %bb.0:
@@ -397,13 +409,13 @@ define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
}
define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte2:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte2:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_bf8_f32_byte2:
; GFX12: ; %bb.0:
@@ -421,13 +433,13 @@ define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
}
define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_bf8_f32_byte3:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte3:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_bf8_f32_byte3:
; GFX12: ; %bb.0:
@@ -445,12 +457,12 @@ define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
}
define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte0:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_fp8_f32_byte0:
; GFX12: ; %bb.0:
@@ -468,13 +480,13 @@ define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
}
define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1:
; GFX12: ; %bb.0:
@@ -492,13 +504,13 @@ define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
}
define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte2:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte2:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2:
; GFX12: ; %bb.0:
@@ -516,13 +528,13 @@ define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
}
define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
-; GFX940-LABEL: test_cvt_sr_fp8_f32_byte3:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_mov_b32_e32 v0, v2
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte3:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
+; GFX9X-NEXT: s_nop 0
+; GFX9X-NEXT: v_mov_b32_e32 v0, v2
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_sr_fp8_f32_byte3:
; GFX12: ; %bb.0:
@@ -540,12 +552,12 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
}
define float @test_sext_cvt_f32_fp8(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_f32_fp8:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_f32_fp8:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_f32_fp8:
; GFX12: ; %bb.0:
@@ -564,12 +576,12 @@ define float @test_sext_cvt_f32_fp8(i16 %a) {
}
define float @test_sext_cvt_f32_bf8(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_f32_bf8:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_f32_bf8:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_f32_bf8:
; GFX12: ; %bb.0:
@@ -588,12 +600,12 @@ define float @test_sext_cvt_f32_bf8(i16 %a) {
}
define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX12: ; %bb.0:
@@ -612,12 +624,12 @@ define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
}
define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
-; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX9X-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX9X: ; %bb.0:
+; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX12: ; %bb.0:
More information about the llvm-branch-commits
mailing list