[llvm] 55d3a55 - [AMDGPU][True16][CodeGen]disable true16 on fneg test (#132221)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 07:32:53 PDT 2025
Author: Brox Chen
Date: 2025-03-20T10:32:49-04:00
New Revision: 55d3a55cc14d77db1c5d254aeb925b2340b11f91
URL: https://github.com/llvm/llvm-project/commit/55d3a55cc14d77db1c5d254aeb925b2340b11f91
DIFF: https://github.com/llvm/llvm-project/commit/55d3a55cc14d77db1c5d254aeb925b2340b11f91.diff
LOG: [AMDGPU][True16][CodeGen]disable true16 on fneg test (#132221)
This is a NFC change.
Revert the failed test case in
https://github.com/llvm/llvm-project/pull/131206
Added:
Modified:
llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 5ea39997938ad..cdb31534674de 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -5,9 +5,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
+; FIXME-TRUE16. fix true16 test
+; XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
+; XUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s
; --------------------------------------------------------------------------------
@@ -49,6 +50,19 @@ define half @v_fneg_add_f16(half %a, half %b) #0 {
; VI-NSZ-NEXT: v_sub_f16_e64 v0, -v0, v1
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_add_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_add_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_sub_f16_e64 v0, -v0, v1
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -56,26 +70,11 @@ define half @v_fneg_add_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%add = fadd half %a, %b
%fneg = fneg half %add
ret half %fneg
@@ -100,6 +99,13 @@ define { half, half } @v_fneg_add_store_use_add_f16(half %a, half %b) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_add_store_use_add_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f16_e32 v1, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_store_use_add_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -107,15 +113,6 @@ define { half, half } @v_fneg_add_store_use_add_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_store_use_add_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_add_f16_e32 v1, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_store_use_add_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -123,14 +120,6 @@ define { half, half } @v_fneg_add_store_use_add_f16(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_store_use_add_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_add_f16_e32 v1, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%add = fadd half %a, %b
%fneg = fneg half %add
%insert.0 = insertvalue { half, half } poison, half %fneg, 0
@@ -177,6 +166,22 @@ define { half, half } @v_fneg_add_multi_use_add_f16(half %a, half %b) #0 {
; VI-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_add_multi_use_add_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_add_f16_e32 v1, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v1
+; GFX11-SAFE-NEXT: v_mul_f16_e32 v1, 4.0, v1
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_add_multi_use_add_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_sub_f16_e64 v0, -v0, v1
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_multi_use_add_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -185,16 +190,6 @@ define { half, half } @v_fneg_add_multi_use_add_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.h
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v1.l, 4.0, v0.h
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_multi_use_add_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_add_f16_e32 v1, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, 4.0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_multi_use_add_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -202,14 +197,6 @@ define { half, half } @v_fneg_add_multi_use_add_f16(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_multi_use_add_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%add = fadd half %a, %b
%fneg = fneg half %add
%use1 = fmul half %add, 4.0
@@ -254,6 +241,19 @@ define half @v_fneg_add_fneg_x_f16(half %a, half %b) #0 {
; VI-NSZ-NEXT: v_sub_f16_e32 v0, v0, v1
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_add_fneg_x_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_sub_f16_e32 v0, v1, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_add_fneg_x_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_sub_f16_e32 v0, v0, v1
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_fneg_x_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -261,26 +261,11 @@ define half @v_fneg_add_fneg_x_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_fneg_x_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v0, v1, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_fneg_x_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_fneg_x_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%add = fadd half %fneg.a, %b
%fneg = fneg half %add
@@ -322,6 +307,19 @@ define half @v_fneg_add_x_fneg_f16(half %a, half %b) #0 {
; VI-NSZ-NEXT: v_sub_f16_e32 v0, v1, v0
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_add_x_fneg_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_sub_f16_e32 v0, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_add_x_fneg_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_sub_f16_e32 v0, v1, v0
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_x_fneg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -329,26 +327,11 @@ define half @v_fneg_add_x_fneg_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_x_fneg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_x_fneg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e32 v0.l, v1.l, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_x_fneg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e32 v0, v1, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.b = fneg half %b
%add = fadd half %a, %fneg.b
%fneg = fneg half %add
@@ -390,6 +373,19 @@ define half @v_fneg_add_fneg_fneg_f16(half %a, half %b) #0 {
; VI-NSZ-NEXT: v_add_f16_e32 v0, v0, v1
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_add_fneg_fneg_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_sub_f16_e64 v0, -v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_add_fneg_fneg_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_fneg_fneg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -397,26 +393,11 @@ define half @v_fneg_add_fneg_fneg_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_fneg_fneg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_fneg_fneg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_fneg_fneg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fneg.b = fneg half %b
%add = fadd half %fneg.a, %fneg.b
@@ -465,6 +446,24 @@ define { half, half } @v_fneg_add_store_use_fneg_x_f16(half %a, half %b) #0 {
; VI-NSZ-NEXT: v_mov_b32_e32 v1, v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_sub_f16_e32 v1, v1, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_sub_f16_e32 v2, v0, v1
+; GFX11-NSZ-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_store_use_fneg_x_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -474,17 +473,6 @@ define { half, half } @v_fneg_add_store_use_fneg_x_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v0.h
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_store_use_fneg_x_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v1, v1, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_store_use_fneg_x_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -493,15 +481,6 @@ define { half, half } @v_fneg_add_store_use_fneg_x_f16(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.h, v1.l
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v0.h
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_store_use_fneg_x_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e32 v2, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%add = fadd half %fneg.a, %b
%fneg = fneg half %add
@@ -557,6 +536,24 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
; VI-NSZ-NEXT: v_mov_b32_e32 v0, v3
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_sub_f16_e32 v1, v1, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v3, 0x8000, v1
+; GFX11-SAFE-NEXT: v_mul_f16_e64 v1, -v0, v2
+; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, v3
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_sub_f16_e32 v3, v0, v1
+; GFX11-NSZ-NEXT: v_mul_f16_e64 v1, -v0, v2
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-NEXT: v_mov_b32_e32 v0, v3
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_add_multi_use_fneg_x_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -566,17 +563,6 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v2.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_add_multi_use_fneg_x_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v1, v1, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v3, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v2
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_add_multi_use_fneg_x_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -585,15 +571,6 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.h, v1.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_add_multi_use_fneg_x_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e32 v3, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%add = fadd half %fneg.a, %b
%fneg = fneg half %add
@@ -688,6 +665,33 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-NSZ-NEXT: ; return to shader part epilog
;
+; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
+; GFX11-SAFE: ; %bb.0: ; %.entry
+; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
+; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-SAFE-NEXT: ; return to shader part epilog
+;
+; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
+; GFX11-NSZ: ; %bb.0: ; %.entry
+; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
+; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-NSZ-NEXT: ; return to shader part epilog
; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_f16:
; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
; GFX11-SAFE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
@@ -704,22 +708,6 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-SAFE-FAKE16-LABEL: fneg_fadd_0_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0: ; %.entry
-; GFX11-SAFE-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_add_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: ; return to shader part epilog
-;
; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_f16:
; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
@@ -732,19 +720,6 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-NSZ-FAKE16-LABEL: fneg_fadd_0_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0: ; %.entry
-; GFX11-NSZ-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv half 1.000000e+00, %tmp6
%tmp8 = fmul half 0.000000e+00, %tmp7
@@ -811,6 +786,29 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-NSZ-NEXT: ; return to shader part epilog
;
+; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
+; GFX11-SAFE: ; %bb.0: ; %.entry
+; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
+; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-SAFE-NEXT: ; return to shader part epilog
+;
+; GFX11-NSZ-LABEL: fneg_fadd_0_nsz_f16:
+; GFX11-NSZ: ; %bb.0: ; %.entry
+; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
+; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-NSZ-NEXT: ; return to shader part epilog
; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, s0, 0
@@ -820,18 +818,6 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-SAFE-FAKE16-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0: ; %.entry
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, s0
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: ; return to shader part epilog
-;
; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
@@ -844,19 +830,6 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-NSZ-FAKE16-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0: ; %.entry
-; GFX11-NSZ-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv afn half 1.000000e+00, %tmp6
%tmp8 = fmul half 0.000000e+00, %tmp7
@@ -891,29 +864,21 @@ define half @v_fneg_mul_f16(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e64 v0, v0, -v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %a, %b
%fneg = fneg half %mul
ret half %fneg
@@ -938,6 +903,13 @@ define { half, half } @v_fneg_mul_store_use_mul_f16(half %a, half %b) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_store_use_mul_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v1, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_store_use_mul_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -945,15 +917,6 @@ define { half, half } @v_fneg_mul_store_use_mul_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_store_use_mul_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_store_use_mul_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -961,14 +924,6 @@ define { half, half } @v_fneg_mul_store_use_mul_f16(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_store_use_mul_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %a, %b
%fneg = fneg half %mul
%insert.0 = insertvalue { half, half } poison, half %fneg, 0
@@ -995,6 +950,13 @@ define { half, half } @v_fneg_mul_multi_use_mul_f16(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e32 v1, -4.0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_multi_use_mul_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_multi_use_mul_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1002,15 +964,6 @@ define { half, half } @v_fneg_mul_multi_use_mul_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_multi_use_mul_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_multi_use_mul_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1018,14 +971,6 @@ define { half, half } @v_fneg_mul_multi_use_mul_f16(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_multi_use_mul_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %a, %b
%fneg = fneg half %mul
%use1 = fmul half %mul, 4.0
@@ -1051,29 +996,21 @@ define half @v_fneg_mul_fneg_x_f16(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_fneg_x_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_fneg_x_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_fneg_x_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_fneg_x_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_fneg_x_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%mul = fmul half %fneg.a, %b
%fneg = fneg half %mul
@@ -1097,29 +1034,21 @@ define half @v_fneg_mul_x_fneg_f16(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_x_fneg_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_x_fneg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_x_fneg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_x_fneg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_x_fneg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.b = fneg half %b
%mul = fmul half %a, %fneg.b
%fneg = fneg half %mul
@@ -1143,29 +1072,21 @@ define half @v_fneg_mul_fneg_fneg_f16(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e64 v0, v0, -v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_fneg_fneg_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_fneg_fneg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_fneg_fneg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_fneg_fneg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_fneg_fneg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fneg.b = fneg half %b
%mul = fmul half %fneg.a, %fneg.b
@@ -1193,6 +1114,14 @@ define { half, half } @v_fneg_mul_store_use_fneg_x_f16(half %a, half %b) #0 {
; VI-NEXT: v_mov_b32_e32 v1, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_store_use_fneg_x_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v2, v0, v1
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_store_use_fneg_x_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1201,16 +1130,6 @@ define { half, half } @v_fneg_mul_store_use_fneg_x_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.h, v1.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v0.h
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_store_use_fneg_x_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_store_use_fneg_x_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1219,15 +1138,6 @@ define { half, half } @v_fneg_mul_store_use_fneg_x_f16(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.h, v1.l
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v0.h
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_store_use_fneg_x_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%mul = fmul half %fneg.a, %b
%fneg = fneg half %mul
@@ -1259,6 +1169,14 @@ define { half, half } @v_fneg_mul_multi_use_fneg_x_f16(half %a, half %b, half %c
; VI-NEXT: v_mov_b32_e32 v0, v3
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v3, v0, v1
+; GFX11-NEXT: v_mul_f16_e64 v1, -v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1267,16 +1185,6 @@ define { half, half } @v_fneg_mul_multi_use_fneg_x_f16(half %a, half %b, half %c
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.h, v1.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v2.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v3, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1285,15 +1193,6 @@ define { half, half } @v_fneg_mul_multi_use_fneg_x_f16(half %a, half %b, half %c
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.h, v1.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v3, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%mul = fmul half %fneg.a, %b
%fneg = fneg half %mul
@@ -1326,6 +1225,14 @@ define half @v_fneg_minnum_f16_ieee(half %a, half %b) #0 {
; VI-NEXT: v_max_f16_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_minnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_minnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1334,16 +1241,6 @@ define half @v_fneg_minnum_f16_ieee(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_minnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_minnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1352,15 +1249,6 @@ define half @v_fneg_minnum_f16_ieee(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_minnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %b)
%fneg = fneg half %min
ret half %fneg
@@ -1383,29 +1271,21 @@ define half @v_fneg_minnum_f16_no_ieee(half %a, half %b) #4 {
; VI-NEXT: v_max_f16_e64 v0, -v0, -v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_minnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_minnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_minnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_minnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_minnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %b)
%fneg = fneg half %min
ret half %fneg
@@ -1424,29 +1304,21 @@ define half @v_fneg_self_minnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_self_minnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_self_minnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_self_minnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_self_minnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_self_minnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %a)
%min.fneg = fneg half %min
ret half %min.fneg
@@ -1465,29 +1337,21 @@ define half @v_fneg_self_minnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_self_minnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_self_minnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_self_minnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_self_minnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_self_minnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %a)
%min.fneg = fneg half %min
ret half %min.fneg
@@ -1509,6 +1373,13 @@ define half @v_fneg_posk_minnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_max_f16_e32 v0, -4.0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_posk_minnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_max_f16_e32 v0, -4.0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_posk_minnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1516,15 +1387,6 @@ define half @v_fneg_posk_minnum_f16_ieee(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, -4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_posk_minnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, -4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_posk_minnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1532,14 +1394,6 @@ define half @v_fneg_posk_minnum_f16_ieee(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_posk_minnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 4.0, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1560,29 +1414,21 @@ define half @v_fneg_posk_minnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_max_f16_e64 v0, -v0, -4.0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_posk_minnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -4.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_posk_minnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -4.0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_posk_minnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -4.0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_posk_minnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -4.0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_posk_minnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -4.0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 4.0, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1604,6 +1450,13 @@ define half @v_fneg_negk_minnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_max_f16_e32 v0, 4.0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_negk_minnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_max_f16_e32 v0, 4.0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_negk_minnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1611,15 +1464,6 @@ define half @v_fneg_negk_minnum_f16_ieee(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, 4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_negk_minnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, 4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_negk_minnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1627,14 +1471,6 @@ define half @v_fneg_negk_minnum_f16_ieee(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, 4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_negk_minnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, 4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half -4.0, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1655,29 +1491,21 @@ define half @v_fneg_negk_minnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_max_f16_e64 v0, -v0, 4.0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_negk_minnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, 4.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_negk_minnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, 4.0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_negk_minnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, 4.0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_negk_minnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, 4.0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_negk_minnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, 4.0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half -4.0, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1700,6 +1528,13 @@ define half @v_fneg_0_minnum_f16(half %a) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_0_minnum_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_0_minnum_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1707,15 +1542,6 @@ define half @v_fneg_0_minnum_f16(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_0_minnum_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_0_minnum_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1723,14 +1549,6 @@ define half @v_fneg_0_minnum_f16(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_0_minnum_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call nnan half @llvm.minnum.f16(half 0.0, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1752,6 +1570,13 @@ define half @v_fneg_neg0_minnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_max_f16_e32 v0, 0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_neg0_minnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_neg0_minnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1759,15 +1584,6 @@ define half @v_fneg_neg0_minnum_f16_ieee(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_neg0_minnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_neg0_minnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1775,14 +1591,6 @@ define half @v_fneg_neg0_minnum_f16_ieee(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_neg0_minnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half -0.0, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1805,6 +1613,14 @@ define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_inv2pi_minnum_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_inv2pi_minnum_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1813,16 +1629,6 @@ define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_inv2pi_minnum_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 0.15915494, v0
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_inv2pi_minnum_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1831,15 +1637,6 @@ define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_inv2pi_minnum_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 0.15915494, v0
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1862,6 +1659,14 @@ define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_neg_inv2pi_minnum_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_neg_inv2pi_minnum_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1870,16 +1675,6 @@ define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_neg_inv2pi_minnum_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 0.15915494, v0
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_neg_inv2pi_minnum_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1888,15 +1683,6 @@ define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_neg_inv2pi_minnum_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 0.15915494, v0
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1917,29 +1703,21 @@ define half @v_fneg_neg0_minnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_max_f16_e64 v0, -v0, 0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, 0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, 0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, 0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, 0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, 0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half -0.0, half %a)
%fneg = fneg half %min
ret half %fneg
@@ -1965,6 +1743,14 @@ define half @v_fneg_0_minnum_foldable_use_f16_ieee(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
+; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1973,16 +1759,6 @@ define half @v_fneg_0_minnum_foldable_use_f16_ieee(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1991,15 +1767,6 @@ define half @v_fneg_0_minnum_foldable_use_f16_ieee(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0.0, half %a)
%fneg = fneg half %min
%mul = fmul half %fneg, %b
@@ -2026,6 +1793,14 @@ define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
+; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2034,16 +1809,6 @@ define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 0.15915494, v0
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2052,15 +1817,6 @@ define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 0.15915494, v0
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
%fneg = fneg half %min
%mul = fmul half %fneg, %b
@@ -2086,6 +1842,13 @@ define half @v_fneg_0_minnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2093,15 +1856,6 @@ define half @v_fneg_0_minnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2109,14 +1863,6 @@ define half @v_fneg_0_minnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0.0, half %a)
%fneg = fneg half %min
%mul = fmul half %fneg, %b
@@ -2144,6 +1890,15 @@ define { half, half } @v_fneg_minnum_multi_use_minnum_f16_ieee(half %a, half %b)
; VI-NEXT: v_mul_f16_e32 v1, -4.0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2153,17 +1908,6 @@ define { half, half } @v_fneg_minnum_multi_use_minnum_f16_ieee(half %a, half %b)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2173,16 +1917,6 @@ define { half, half } @v_fneg_minnum_multi_use_minnum_f16_ieee(half %a, half %b)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %b)
%fneg = fneg half %min
%use1 = fmul half %min, 4.0
@@ -2212,6 +1946,14 @@ define <2 x half> @v_fneg_minnum_multi_use_minnum_f16_no_ieee(half %a, half %b)
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e32 v1, 4.0, v0
+; GFX11-NEXT: v_pack_b32_f16 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2220,16 +1962,6 @@ define <2 x half> @v_fneg_minnum_multi_use_minnum_f16_no_ieee(half %a, half %b)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.h, 4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, -v0.l, v0.h
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, 4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2238,15 +1970,6 @@ define <2 x half> @v_fneg_minnum_multi_use_minnum_f16_no_ieee(half %a, half %b)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.h, 4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, -v0.l, v0.h
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, 4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %b)
%fneg = fneg half %min
%use1 = fmul half %min, 4.0
@@ -2278,6 +2001,14 @@ define half @v_fneg_maxnum_f16_ieee(half %a, half %b) #0 {
; VI-NEXT: v_min_f16_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_maxnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_maxnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2286,16 +2017,6 @@ define half @v_fneg_maxnum_f16_ieee(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_maxnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_maxnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2304,15 +2025,6 @@ define half @v_fneg_maxnum_f16_ieee(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_maxnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %b)
%fneg = fneg half %max
ret half %fneg
@@ -2335,29 +2047,21 @@ define half @v_fneg_maxnum_f16_no_ieee(half %a, half %b) #4 {
; VI-NEXT: v_min_f16_e64 v0, -v0, -v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_maxnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_min_f16_e64 v0, -v0, -v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_maxnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, -v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_maxnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e64 v0, -v0, -v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_maxnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, -v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_maxnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e64 v0, -v0, -v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %b)
%fneg = fneg half %max
ret half %fneg
@@ -2376,29 +2080,21 @@ define half @v_fneg_self_maxnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_self_maxnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_self_maxnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_self_maxnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_self_maxnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_self_maxnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %a)
%max.fneg = fneg half %max
ret half %max.fneg
@@ -2417,29 +2113,21 @@ define half @v_fneg_self_maxnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_self_maxnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_self_maxnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_self_maxnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_self_maxnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_self_maxnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %a)
%max.fneg = fneg half %max
ret half %max.fneg
@@ -2461,6 +2149,13 @@ define half @v_fneg_posk_maxnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_min_f16_e32 v0, -4.0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_posk_maxnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, -4.0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_posk_maxnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2468,15 +2163,6 @@ define half @v_fneg_posk_maxnum_f16_ieee(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, -4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_posk_maxnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, -4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_posk_maxnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2484,14 +2170,6 @@ define half @v_fneg_posk_maxnum_f16_ieee(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_posk_maxnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half 4.0, half %a)
%fneg = fneg half %max
ret half %fneg
@@ -2512,29 +2190,21 @@ define half @v_fneg_posk_maxnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_min_f16_e64 v0, -v0, -4.0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_min_f16_e64 v0, -v0, -4.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, -4.0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e64 v0, -v0, -4.0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, -4.0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e64 v0, -v0, -4.0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half 4.0, half %a)
%fneg = fneg half %max
ret half %fneg
@@ -2556,6 +2226,13 @@ define half @v_fneg_negk_maxnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_min_f16_e32 v0, 4.0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_negk_maxnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, 4.0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_negk_maxnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2563,15 +2240,6 @@ define half @v_fneg_negk_maxnum_f16_ieee(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_negk_maxnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_negk_maxnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2579,14 +2247,6 @@ define half @v_fneg_negk_maxnum_f16_ieee(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_negk_maxnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half -4.0, half %a)
%fneg = fneg half %max
ret half %fneg
@@ -2607,29 +2267,21 @@ define half @v_fneg_negk_maxnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_min_f16_e64 v0, -v0, 4.0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_min_f16_e64 v0, -v0, 4.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, 4.0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e64 v0, -v0, 4.0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, 4.0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e64 v0, -v0, 4.0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half -4.0, half %a)
%fneg = fneg half %max
ret half %fneg
@@ -2652,6 +2304,13 @@ define half @v_fneg_0_maxnum_f16(half %a) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_0_maxnum_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_0_maxnum_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2659,15 +2318,6 @@ define half @v_fneg_0_maxnum_f16(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_0_maxnum_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_0_maxnum_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2675,14 +2325,6 @@ define half @v_fneg_0_maxnum_f16(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_0_maxnum_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call nnan half @llvm.maxnum.f16(half 0.0, half %a)
%fneg = fneg half %max
ret half %fneg
@@ -2704,6 +2346,13 @@ define half @v_fneg_neg0_maxnum_f16_ieee(half %a) #0 {
; VI-NEXT: v_min_f16_e32 v0, 0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_neg0_maxnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_neg0_maxnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2711,15 +2360,6 @@ define half @v_fneg_neg0_maxnum_f16_ieee(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_neg0_maxnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_neg0_maxnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2727,14 +2367,6 @@ define half @v_fneg_neg0_maxnum_f16_ieee(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_neg0_maxnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half -0.0, half %a)
%fneg = fneg half %max
ret half %fneg
@@ -2755,29 +2387,21 @@ define half @v_fneg_neg0_maxnum_f16_no_ieee(half %a) #4 {
; VI-NEXT: v_min_f16_e64 v0, -v0, 0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_min_f16_e64 v0, -v0, 0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, 0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e64 v0, -v0, 0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e64 v0.l, -v0.l, 0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e64 v0, -v0, 0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half -0.0, half %a)
%fneg = fneg half %max
ret half %fneg
@@ -2803,6 +2427,14 @@ define half @v_fneg_0_maxnum_foldable_use_f16_ieee(half %a, half %b) #0 {
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
+; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2811,16 +2443,6 @@ define half @v_fneg_0_maxnum_foldable_use_f16_ieee(half %a, half %b) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2829,15 +2451,6 @@ define half @v_fneg_0_maxnum_foldable_use_f16_ieee(half %a, half %b) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half 0.0, half %a)
%fneg = fneg half %max
%mul = fmul half %fneg, %b
@@ -2863,6 +2476,13 @@ define half @v_fneg_0_maxnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2870,15 +2490,6 @@ define half @v_fneg_0_maxnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2886,14 +2497,6 @@ define half @v_fneg_0_maxnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half 0.0, half %a)
%fneg = fneg half %max
%mul = fmul half %fneg, %b
@@ -2921,6 +2524,15 @@ define { half, half } @v_fneg_maxnum_multi_use_maxnum_f16_ieee(half %a, half %b)
; VI-NEXT: v_mul_f16_e32 v1, -4.0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2930,17 +2542,6 @@ define { half, half } @v_fneg_maxnum_multi_use_maxnum_f16_ieee(half %a, half %b)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2950,16 +2551,6 @@ define { half, half } @v_fneg_maxnum_multi_use_maxnum_f16_ieee(half %a, half %b)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %b)
%fneg = fneg half %max
%use1 = fmul half %max, 4.0
@@ -2989,6 +2580,14 @@ define <2 x half> @v_fneg_maxnum_multi_use_maxnum_f16_no_ieee(half %a, half %b)
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e32 v1, 4.0, v0
+; GFX11-NEXT: v_pack_b32_f16 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2997,16 +2596,6 @@ define <2 x half> @v_fneg_maxnum_multi_use_maxnum_f16_no_ieee(half %a, half %b)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.h, 4.0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, -v0.l, v0.h
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, 4.0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3015,15 +2604,6 @@ define <2 x half> @v_fneg_maxnum_multi_use_maxnum_f16_no_ieee(half %a, half %b)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.h, 4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, -v0.l, v0.h
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, 4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %b)
%fneg = fneg half %max
%use1 = fmul half %max, 4.0
@@ -3075,6 +2655,19 @@ define half @v_fneg_fma_f16(half %a, half %b, half %c) #0 {
; VI-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3082,26 +2675,11 @@ define half @v_fneg_fma_f16(half %a, half %b, half %c) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fmac_f16_e32 v2, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, -v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %a, half %b, half %c)
%fneg = fneg half %fma
ret half %fneg
@@ -3128,6 +2706,13 @@ define { half, half } @v_fneg_fma_store_use_fma_f16(half %a, half %b, half %c) #
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_fma_store_use_fma_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v1, v0, v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_store_use_fma_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3135,15 +2720,6 @@ define { half, half } @v_fneg_fma_store_use_fma_f16(half %a, half %b, half %c) #
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_store_use_fma_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v1, v0, v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_store_use_fma_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3151,14 +2727,6 @@ define { half, half } @v_fneg_fma_store_use_fma_f16(half %a, half %b, half %c) #
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_store_use_fma_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v1, v0, v1, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %a, half %b, half %c)
%fneg = fneg half %fma
%insert.0 = insertvalue { half, half } poison, half %fneg, 0
@@ -3209,6 +2777,22 @@ define { half, half } @v_fneg_fma_multi_use_fma_f16(half %a, half %b, half %c) #
; VI-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fma_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
+; GFX11-SAFE-NEXT: v_mul_f16_e32 v1, 4.0, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fma_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_multi_use_fma_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3217,16 +2801,6 @@ define { half, half } @v_fneg_fma_multi_use_fma_f16(half %a, half %b, half %c) #
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v1.l, 4.0, v2.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_multi_use_fma_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fmac_f16_e32 v2, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, 4.0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_multi_use_fma_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3234,14 +2808,6 @@ define { half, half } @v_fneg_fma_multi_use_fma_f16(half %a, half %b, half %c) #
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_multi_use_fma_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %a, half %b, half %c)
%fneg = fneg half %fma
%use1 = fmul half %fma, 4.0
@@ -3289,6 +2855,19 @@ define half @v_fneg_fma_fneg_x_y_f16(half %a, half %b, half %c) #0 {
; VI-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_y_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fma_f16 v0, -v0, v1, v2
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_y_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_fneg_x_y_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3296,26 +2875,11 @@ define half @v_fneg_fma_fneg_x_y_f16(half %a, half %b, half %c) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_fneg_x_y_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_fneg_x_y_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_fneg_x_y_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
%fneg = fneg half %fma
@@ -3361,6 +2925,19 @@ define half @v_fneg_fma_x_fneg_y_f16(half %a, half %b, half %c) #0 {
; VI-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_x_fneg_y_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_x_fneg_y_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_x_fneg_y_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3368,26 +2945,11 @@ define half @v_fneg_fma_x_fneg_y_f16(half %a, half %b, half %c) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_x_fneg_y_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_x_fneg_y_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_x_fneg_y_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.b = fneg half %b
%fma = call half @llvm.fma.f16(half %a, half %fneg.b, half %c)
%fneg = fneg half %fma
@@ -3433,6 +2995,19 @@ define half @v_fneg_fma_fneg_fneg_y_f16(half %a, half %b, half %c) #0 {
; VI-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_fneg_fneg_y_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3440,26 +3015,11 @@ define half @v_fneg_fma_fneg_fneg_y_f16(half %a, half %b, half %c) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_fneg_fneg_y_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fmac_f16_e32 v2, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_fneg_fneg_y_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, -v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_fneg_fneg_y_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fneg.b = fneg half %b
%fma = call half @llvm.fma.f16(half %fneg.a, half %fneg.b, half %c)
@@ -3506,6 +3066,19 @@ define half @v_fneg_fma_fneg_x_fneg_f16(half %a, half %b, half %c) #0 {
; VI-NSZ-NEXT: v_fma_f16 v0, v0, v1, v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fma_f16 v0, -v0, v1, -v2
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_fneg_x_fneg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3513,26 +3086,11 @@ define half @v_fneg_fma_fneg_x_fneg_f16(half %a, half %b, half %c) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_fneg_x_fneg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, -v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_fneg_x_fneg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_fneg_x_fneg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fneg.c = fneg half %c
%fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %fneg.c)
@@ -3579,6 +3137,19 @@ define half @v_fneg_fma_x_y_fneg_f16(half %a, half %b, half %c) #0 {
; VI-NSZ-NEXT: v_fma_f16 v0, v0, -v1, v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_x_y_fneg_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_x_y_fneg_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_x_y_fneg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3586,26 +3157,11 @@ define half @v_fneg_fma_x_y_fneg_f16(half %a, half %b, half %c) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_x_y_fneg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_x_y_fneg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_x_y_fneg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.c = fneg half %c
%fma = call half @llvm.fma.f16(half %a, half %b, half %fneg.c)
%fneg = fneg half %fma
@@ -3657,6 +3213,24 @@ define { half, half } @v_fneg_fma_store_use_fneg_x_y_f16(half %a, half %b, half
; VI-NSZ-NEXT: v_mov_b32_e32 v1, v3
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fma_f16 v1, -v0, v1, v2
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v2, v0, v1, -v2
+; GFX11-NSZ-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3666,17 +3240,6 @@ define { half, half } @v_fneg_fma_store_use_fneg_x_y_f16(half %a, half %b, half
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v0.h
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v1, -v0, v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3685,15 +3248,6 @@ define { half, half } @v_fneg_fma_store_use_fneg_x_y_f16(half %a, half %b, half
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.h, v1.l, -v2.l
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v0.h
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v2, v0, v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
%fneg = fneg half %fma
@@ -3752,6 +3306,24 @@ define { half, half } @v_fneg_fma_multi_use_fneg_x_y_f16(half %a, half %b, half
; VI-NSZ-NEXT: v_mov_b32_e32 v0, v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fma_f16 v1, -v0, v1, v2
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1
+; GFX11-SAFE-NEXT: v_mul_f16_e64 v1, -v0, v3
+; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v2, v0, v1, -v2
+; GFX11-NSZ-NEXT: v_mul_f16_e64 v1, -v0, v3
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3761,17 +3333,6 @@ define { half, half } @v_fneg_fma_multi_use_fneg_x_y_f16(half %a, half %b, half
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v3.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v1, -v0, v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v3
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3780,15 +3341,6 @@ define { half, half } @v_fneg_fma_multi_use_fneg_x_y_f16(half %a, half %b, half
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.h, v1.l, -v2.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v3.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v2, v0, v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
%fneg = fneg half %fma
@@ -3841,6 +3393,19 @@ define half @v_fneg_fmad_f16(half %a, half %b, half %c) #0 {
; VI-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fmad_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fmad_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fmad_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3848,26 +3413,11 @@ define half @v_fneg_fmad_f16(half %a, half %b, half %c) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fmad_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fmac_f16_e32 v2, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fmad_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, -v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fmad_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
%fneg = fneg half %fma
ret half %fneg
@@ -4048,6 +3598,22 @@ define { half, half } @v_fneg_fmad_multi_use_fmad_f16(half %a, half %b, half %c)
; VI-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
+; GFX11-SAFE-NEXT: v_mul_f16_e32 v1, 4.0, v2
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fmad_multi_use_fmad_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4056,16 +3622,6 @@ define { half, half } @v_fneg_fmad_multi_use_fmad_f16(half %a, half %b, half %c)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v1.l, 4.0, v2.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fmad_multi_use_fmad_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fmac_f16_e32 v2, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, 4.0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fmad_multi_use_fmad_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4073,14 +3629,6 @@ define { half, half } @v_fneg_fmad_multi_use_fmad_f16(half %a, half %b, half %c)
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, -4.0, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fmad_multi_use_fmad_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, -v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, -4.0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
%fneg = fneg half %fma
%use1 = fmul half %fma, 4.0
@@ -4108,6 +3656,14 @@ define double @v_fneg_fp_extend_f16_to_f64(half %a) #0 {
; VI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_fp_extend_f16_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fp_extend_f16_to_f64:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4116,16 +3672,6 @@ define double @v_fneg_fp_extend_f16_to_f64(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fp_extend_f16_to_f64:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fp_extend_f16_to_f64:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4134,15 +3680,6 @@ define double @v_fneg_fp_extend_f16_to_f64(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fp_extend_f16_to_f64:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fpext = fpext half %a to double
%fneg = fneg double %fpext
ret double %fneg
@@ -4162,6 +3699,13 @@ define double @v_fneg_fp_extend_fneg_f16_to_f64(half %a) #0 {
; VI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4169,15 +3713,6 @@ define double @v_fneg_fp_extend_fneg_f16_to_f64(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4185,14 +3720,6 @@ define double @v_fneg_fp_extend_fneg_f16_to_f64(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fpext = fpext half %fneg.a to double
%fneg = fneg double %fpext
@@ -4217,6 +3744,15 @@ define { double, half } @v_fneg_fp_extend_store_use_fneg_f16_to_f64(half %a) #0
; VI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2
+; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4226,17 +3762,6 @@ define { double, half } @v_fneg_fp_extend_store_use_fneg_f16_to_f64(half %a) #0
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v2.l, 0x8000, v2.l
; GFX11-SAFE-TRUE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v2, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v2
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4246,16 +3771,6 @@ define { double, half } @v_fneg_fp_extend_store_use_fneg_f16_to_f64(half %a) #0
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v2.l, 0x8000, v2.l
; GFX11-NSZ-TRUE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v2, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v2
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fpext = fpext half %fneg.a to double
%fneg = fneg double %fpext
@@ -4282,6 +3797,16 @@ define { double, double } @v_fneg_multi_use_fp_extend_fneg_f16_to_f64(half %a) #
; VI-NEXT: v_mov_b32_e32 v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v0
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4292,18 +3817,6 @@ define { double, double } @v_fneg_multi_use_fp_extend_fneg_f16_to_f64(half %a) #
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f64_f32_e32 v[2:3], v0
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80000000, v3
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4314,17 +3827,6 @@ define { double, double } @v_fneg_multi_use_fp_extend_fneg_f16_to_f64(half %a) #
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NSZ-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f64_f32_e32 v[2:3], v0
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80000000, v3
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fpext = fpext half %a to double
%fneg = fneg double %fpext
%insert.0 = insertvalue { double, double } poison, double %fneg, 0
@@ -4351,6 +3853,15 @@ define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64(h
; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX11-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4360,17 +3871,6 @@ define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64(h
; GFX11-SAFE-TRUE16-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0
; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4380,16 +3880,6 @@ define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64(h
; GFX11-NSZ-TRUE16-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0
; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fpext = fpext half %a to double
%fneg = fneg double %fpext
%mul = fmul double %fpext, 4.0
@@ -4413,6 +3903,13 @@ define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0
; VI-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4420,15 +3917,6 @@ define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4436,14 +3924,6 @@ define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fpext = fpext half %a to float
%fneg = fneg float %fpext
%insert.0 = insertvalue { float, float } poison, float %fneg, 0
@@ -4863,6 +4343,56 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
; VI-NEXT: v_mov_b32_e32 v1, v3
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_and_or_b32 v2, 0x1ff, v1, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
+; GFX11-NEXT: v_bfe_u32 v4, v1, 20, 11
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-NEXT: v_sub_nc_u32_e32 v5, 0x3f1, v4
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_and_or_b32 v2, 0xffe, v3, v2
+; GFX11-NEXT: v_med3_i32 v3, v5, 0, 13
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_or_b32_e32 v5, 0x1000, v2
+; GFX11-NEXT: v_lshrrev_b32_e32 v6, v3, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v3, v3, v6
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v3, v5
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_or_b32_e32 v3, v6, v3
+; GFX11-NEXT: v_add_nc_u32_e32 v4, 0xfffffc10, v4
+; GFX11-NEXT: v_lshl_or_b32 v5, v4, 12, v2
+; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc_lo
+; GFX11-NEXT: v_and_b32_e32 v5, 7, v3
+; GFX11-NEXT: v_lshrrev_b32_e32 v3, 2, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v5
+; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v5
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-NEXT: v_or_b32_e32 v5, v5, v6
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v6, 0x7e00 :: v_dual_add_nc_u32 v3, v3, v5
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v6, vcc_lo
+; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v4
+; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v4
+; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_and_or_b32 v3, 0x8000, v5, v2
+; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
+; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4914,58 +4444,6 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
; GFX11-SAFE-TRUE16-NEXT: v_mov_b32_e32 v1, v0
; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_or_b32 v2, 0x1ff, v1, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-SAFE-FAKE16-NEXT: v_bfe_u32 v4, v1, 20, 11
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
-; GFX11-SAFE-FAKE16-NEXT: v_sub_nc_u32_e32 v5, 0x3f1, v4
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_and_or_b32 v2, 0xffe, v3, v2
-; GFX11-SAFE-FAKE16-NEXT: v_med3_i32 v3, v5, 0, 13
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v5, 0x1000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, v3, v5
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshlrev_b32_e32 v3, v3, v6
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, v3, v5
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v3, v6, v3
-; GFX11-SAFE-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0xfffffc10, v4
-; GFX11-SAFE-FAKE16-NEXT: v_lshl_or_b32 v5, v4, 12, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v4
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v5, 7, v3
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 2, v3
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v5
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v5
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v5, v5, v6
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_dual_mov_b32 v6, 0x7e00 :: v_dual_add_nc_u32 v3, v3, v5
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v4
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v4
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_and_or_b32 v3, 0x8000, v5, v2
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
-; GFX11-SAFE-FAKE16-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5017,57 +4495,6 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
; GFX11-NSZ-TRUE16-NEXT: v_mov_b32_e32 v1, v0
; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_or_b32 v2, 0x1ff, v1, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NSZ-FAKE16-NEXT: v_bfe_u32 v4, v1, 20, 11
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
-; GFX11-NSZ-FAKE16-NEXT: v_sub_nc_u32_e32 v5, 0x3f1, v4
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_and_or_b32 v2, 0xffe, v3, v2
-; GFX11-NSZ-FAKE16-NEXT: v_med3_i32 v3, v5, 0, 13
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v5, 0x1000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, v3, v5
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshlrev_b32_e32 v3, v3, v6
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, v3, v5
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v3, v6, v3
-; GFX11-NSZ-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0xfffffc10, v4
-; GFX11-NSZ-FAKE16-NEXT: v_lshl_or_b32 v5, v4, 12, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v4
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v5, 7, v3
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 2, v3
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v5
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v5
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v5, v5, v6
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_dual_mov_b32 v6, 0x7e00 :: v_dual_add_nc_u32 v3, v3, v5
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v4
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v4
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_and_or_b32 v3, 0x8000, v5, v2
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
-; GFX11-NSZ-FAKE16-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg double %a
%fpround = fptrunc double %fneg.a to half
%fneg = fneg half %fpround
@@ -5336,6 +4763,55 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v1
+; GFX11-NEXT: v_bfe_u32 v3, v1, 20, 11
+; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_sub_nc_u32_e32 v4, 0x3f1, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_and_or_b32 v0, 0xffe, v2, v0
+; GFX11-NEXT: v_med3_i32 v2, v4, 0, 13
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_or_b32_e32 v4, 0x1000, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v5, v2, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v5
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v4
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xfffffc10, v3
+; GFX11-NEXT: v_lshl_or_b32 v4, v3, 12, v0
+; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo
+; GFX11-NEXT: v_and_b32_e32 v4, 7, v2
+; GFX11-NEXT: v_lshrrev_b32_e32 v2, 2, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v4
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v4
+; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
+; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_and_or_b32 v1, 0x8000, v1, v0
+; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5385,57 +4861,6 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_and_or_b32 v1, 0x8000, v1, v0
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 8, v1
-; GFX11-SAFE-FAKE16-NEXT: v_bfe_u32 v3, v1, 20, 11
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_sub_nc_u32_e32 v4, 0x3f1, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_and_or_b32 v0, 0xffe, v2, v0
-; GFX11-SAFE-FAKE16-NEXT: v_med3_i32 v2, v4, 0, 13
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v4, 0x1000, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, v2, v4
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshlrev_b32_e32 v2, v2, v5
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v4
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, v5, v2
-; GFX11-SAFE-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0xfffffc10, v3
-; GFX11-SAFE-FAKE16-NEXT: v_lshl_or_b32 v4, v3, 12, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v3
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v4, 7, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 2, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v4
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v4
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v4, v4, v5
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v3
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_and_or_b32 v1, 0x8000, v1, v0
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5485,56 +4910,6 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: v_and_or_b32 v1, 0x8000, v1, v0
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 8, v1
-; GFX11-NSZ-FAKE16-NEXT: v_bfe_u32 v3, v1, 20, 11
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_sub_nc_u32_e32 v4, 0x3f1, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_and_or_b32 v0, 0xffe, v2, v0
-; GFX11-NSZ-FAKE16-NEXT: v_med3_i32 v2, v4, 0, 13
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v4, 0x1000, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, v2, v4
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshlrev_b32_e32 v2, v2, v5
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v4
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, v5, v2
-; GFX11-NSZ-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0xfffffc10, v3
-; GFX11-NSZ-FAKE16-NEXT: v_lshl_or_b32 v4, v3, 12, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v3
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v4, 7, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 2, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v4
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v4
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v4, v4, v5
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v3
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_and_or_b32 v1, 0x8000, v1, v0
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fpround = fptrunc double %a to half
%fneg = fneg half %fpround
%insert.0 = insertvalue { half, half } poison, half %fneg, 0
@@ -5561,29 +4936,21 @@ define half @v_fneg_trunc_f16(half %a) #0 {
; VI-NEXT: v_trunc_f16_e64 v0, -v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_trunc_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_trunc_f16_e64 v0, -v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_trunc_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_trunc_f16_e64 v0.l, -v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_trunc_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_trunc_f16_e64 v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_trunc_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_trunc_f16_e64 v0.l, -v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_trunc_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_trunc_f16_e64 v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%trunc = call half @llvm.trunc.f16(half %a)
%fneg = fneg half %trunc
ret half %fneg
@@ -5650,6 +5017,34 @@ define half @v_fneg_round_f16(half %a) #0 {
; VI-NSZ-NEXT: v_sub_f16_e64 v0, -v1, v0
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: v_fneg_round_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_trunc_f16_e32 v1, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_sub_f16_e32 v2, v0, v1
+; GFX11-SAFE-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0
+; GFX11-SAFE-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_add_f16_e32 v0, v1, v0
+; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: v_fneg_round_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_trunc_f16_e32 v1, v0
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_sub_f16_e32 v2, v0, v1
+; GFX11-NSZ-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0
+; GFX11-NSZ-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_sub_f16_e64 v0, -v1, v0
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_round_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5667,22 +5062,6 @@ define half @v_fneg_round_f16(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e32 v0.l, v1.h, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_round_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_trunc_f16_e32 v1, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v2, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0
-; GFX11-SAFE-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_add_f16_e32 v0, v1, v0
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_round_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5699,20 +5078,6 @@ define half @v_fneg_round_f16(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e64 v0.l, -v1.h, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_round_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_trunc_f16_e32 v1, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e32 v2, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0
-; GFX11-NSZ-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e64 v0, -v1, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%round = call half @llvm.round.f16(half %a)
%fneg = fneg half %round
ret half %fneg
@@ -5737,29 +5102,21 @@ define half @v_fneg_rint_f16(half %a) #0 {
; VI-NEXT: v_rndne_f16_e64 v0, -v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_rint_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f16_e64 v0, -v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_rint_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_rndne_f16_e64 v0.l, -v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_rint_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_rndne_f16_e64 v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_rint_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_rndne_f16_e64 v0.l, -v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_rint_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_rndne_f16_e64 v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%rint = call half @llvm.rint.f16(half %a)
%fneg = fneg half %rint
ret half %fneg
@@ -5784,29 +5141,21 @@ define half @v_fneg_nearbyint_f16(half %a) #0 {
; VI-NEXT: v_rndne_f16_e64 v0, -v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_nearbyint_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f16_e64 v0, -v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_nearbyint_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_rndne_f16_e64 v0.l, -v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_nearbyint_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_rndne_f16_e64 v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_nearbyint_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_rndne_f16_e64 v0.l, -v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_nearbyint_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_rndne_f16_e64 v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%nearbyint = call half @llvm.nearbyint.f16(half %a)
%fneg = fneg half %nearbyint
ret half %fneg
@@ -5835,6 +5184,13 @@ define half @v_fneg_sin_f16(half %a) #0 {
; VI-NEXT: v_sin_f16_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_sin_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v0, 0xb118, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_sin_f16_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_sin_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5842,15 +5198,6 @@ define half @v_fneg_sin_f16(half %a) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_sin_f16_e32 v0.l, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_sin_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v0, 0xb118, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_sin_f16_e32 v0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_sin_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5858,14 +5205,6 @@ define half @v_fneg_sin_f16(half %a) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_sin_f16_e32 v0.l, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_sin_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, 0xb118, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_sin_f16_e32 v0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sin = call half @llvm.sin.f16(half %a)
%fneg = fneg half %sin
ret half %fneg
@@ -5889,29 +5228,21 @@ define half @v_fneg_canonicalize_f16(half %a) #0 {
; VI-NEXT: v_max_f16_e64 v0, -v0, -v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_canonicalize_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_canonicalize_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_canonicalize_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_canonicalize_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_canonicalize_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%trunc = call half @llvm.canonicalize.f16(half %a)
%fneg = fneg half %trunc
ret half %fneg
@@ -5974,6 +5305,27 @@ define void @v_fneg_copytoreg_f16(ptr addrspace(1) %out, half %a, half %b, half
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_copytoreg_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_and_b32_e32 v6, 0x3ff, v31
+; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3
+; GFX11-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v6, 1, v6
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v5
+; GFX11-NEXT: s_cbranch_execz .LBB81_2
+; GFX11-NEXT: ; %bb.1: ; %if
+; GFX11-NEXT: v_mul_f16_e64 v3, -v2, v4
+; GFX11-NEXT: global_store_b16 v[0:1], v3, off dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: .LBB81_2: ; %endif
+; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_copytoreg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5995,29 +5347,6 @@ define void @v_fneg_copytoreg_f16(ptr addrspace(1) %out, half %a, half %b, half
; GFX11-SAFE-TRUE16-NEXT: global_store_b16 v[0:1], v2, off dlc
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_copytoreg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v6, 0x3ff, v31
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v2, v2, v3
-; GFX11-SAFE-FAKE16-NEXT: s_mov_b32 s0, exec_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 1, v6
-; GFX11-SAFE-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6
-; GFX11-SAFE-FAKE16-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmpx_eq_u32_e32 0, v5
-; GFX11-SAFE-FAKE16-NEXT: s_cbranch_execz .LBB81_2
-; GFX11-SAFE-FAKE16-NEXT: ; %bb.1: ; %if
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v3, -v2, v4
-; GFX11-SAFE-FAKE16-NEXT: global_store_b16 v[0:1], v3, off dlc
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SAFE-FAKE16-NEXT: .LBB81_2: ; %endif
-; GFX11-SAFE-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX11-SAFE-FAKE16-NEXT: global_store_b16 v[0:1], v2, off dlc
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_copytoreg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6039,28 +5368,6 @@ define void @v_fneg_copytoreg_f16(ptr addrspace(1) %out, half %a, half %b, half
; GFX11-NSZ-TRUE16-NEXT: global_store_b16 v[0:1], v2, off dlc
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_copytoreg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v6, 0x3ff, v31
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v2, v2, v3
-; GFX11-NSZ-FAKE16-NEXT: s_mov_b32 s0, exec_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 1, v6
-; GFX11-NSZ-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6
-; GFX11-NSZ-FAKE16-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmpx_eq_u32_e32 0, v5
-; GFX11-NSZ-FAKE16-NEXT: s_cbranch_execz .LBB81_2
-; GFX11-NSZ-FAKE16-NEXT: ; %bb.1: ; %if
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v3, -v2, v4
-; GFX11-NSZ-FAKE16-NEXT: global_store_b16 v[0:1], v3, off dlc
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NSZ-FAKE16-NEXT: .LBB81_2: ; %endif
-; GFX11-NSZ-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX11-NSZ-FAKE16-NEXT: global_store_b16 v[0:1], v2, off dlc
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -6108,6 +5415,14 @@ define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) #0 {
; VI-NEXT: ;;#ASMEND
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_inlineasm_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ; use v0
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_inlineasm_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6116,16 +5431,6 @@ define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) #0 {
; GFX11-SAFE-TRUE16-NEXT: ; use v0
; GFX11-SAFE-TRUE16-NEXT: ;;#ASMEND
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_inlineasm_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-SAFE-FAKE16-NEXT: ;;#ASMSTART
-; GFX11-SAFE-FAKE16-NEXT: ; use v0
-; GFX11-SAFE-FAKE16-NEXT: ;;#ASMEND
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_inlineasm_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6134,15 +5439,6 @@ define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) #0 {
; GFX11-NSZ-TRUE16-NEXT: ; use v0
; GFX11-NSZ-TRUE16-NEXT: ;;#ASMEND
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_inlineasm_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX11-NSZ-FAKE16-NEXT: ;;#ASMSTART
-; GFX11-NSZ-FAKE16-NEXT: ; use v0
-; GFX11-NSZ-FAKE16-NEXT: ;;#ASMEND
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %a, %b
%fneg = fneg half %mul
call void asm sideeffect "; use $0", "v"(half %fneg) #0
@@ -6179,6 +5475,16 @@ define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a,
; VI-NEXT: ;;#ASMEND
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: v_fneg_inlineasm_multi_use_src_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v0, v2, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ; use v1
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_inlineasm_multi_use_src_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6189,18 +5495,6 @@ define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a,
; GFX11-SAFE-TRUE16-NEXT: ; use v1
; GFX11-SAFE-TRUE16-NEXT: ;;#ASMEND
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: v_fneg_inlineasm_multi_use_src_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v0, v2, v3
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-SAFE-FAKE16-NEXT: ;;#ASMSTART
-; GFX11-SAFE-FAKE16-NEXT: ; use v1
-; GFX11-SAFE-FAKE16-NEXT: ;;#ASMEND
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: v_fneg_inlineasm_multi_use_src_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6211,17 +5505,6 @@ define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a,
; GFX11-NSZ-TRUE16-NEXT: ; use v1
; GFX11-NSZ-TRUE16-NEXT: ;;#ASMEND
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: v_fneg_inlineasm_multi_use_src_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, v2, v3
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-NSZ-FAKE16-NEXT: ;;#ASMSTART
-; GFX11-NSZ-FAKE16-NEXT: ; use v1
-; GFX11-NSZ-FAKE16-NEXT: ;;#ASMEND
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -6259,6 +5542,14 @@ define { half, half } @multiuse_fneg_2_vop3_users_f16(half %a, half %b, half %c)
; VI-NEXT: v_mov_b32_e32 v0, v3
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: multiuse_fneg_2_vop3_users_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v3, -v0, v1, v2
+; GFX11-NEXT: v_fma_f16 v1, -v0, v2, 2.0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: multiuse_fneg_2_vop3_users_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6267,16 +5558,6 @@ define { half, half } @multiuse_fneg_2_vop3_users_f16(half %a, half %b, half %c)
; GFX11-SAFE-TRUE16-NEXT: v_fma_f16 v0.l, -v0.h, v1.l, v2.l
; GFX11-SAFE-TRUE16-NEXT: v_fma_f16 v1.l, -v0.h, v2.l, 2.0
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: multiuse_fneg_2_vop3_users_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v3, -v0, v1, v2
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v1, -v0, v2, 2.0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: multiuse_fneg_2_vop3_users_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6285,15 +5566,6 @@ define { half, half } @multiuse_fneg_2_vop3_users_f16(half %a, half %b, half %c)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, -v0.h, v1.l, v2.l
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v1.l, -v0.h, v2.l, 2.0
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: multiuse_fneg_2_vop3_users_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v3, -v0, v1, v2
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v1, -v0, v2, 2.0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%fma0 = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
%fma1 = call half @llvm.fma.f16(half %fneg.a, half %c, half 2.0)
@@ -6326,6 +5598,14 @@ define { half, half } @multiuse_fneg_2_vop2_users_f16(half %a, half %b, half %c)
; VI-NEXT: v_mov_b32_e32 v0, v3
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: multiuse_fneg_2_vop2_users_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v3, -v0, v1
+; GFX11-NEXT: v_mul_f16_e64 v1, -v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: multiuse_fneg_2_vop2_users_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6334,16 +5614,6 @@ define { half, half } @multiuse_fneg_2_vop2_users_f16(half %a, half %b, half %c)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.h, v1.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v2.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: multiuse_fneg_2_vop2_users_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v3, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: multiuse_fneg_2_vop2_users_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6352,15 +5622,6 @@ define { half, half } @multiuse_fneg_2_vop2_users_f16(half %a, half %b, half %c)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.h, v1.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v2.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: multiuse_fneg_2_vop2_users_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v3, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v1, -v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%mul0 = fmul half %fneg.a, %b
%mul1 = fmul half %fneg.a, %c
@@ -6391,33 +5652,24 @@ define { half, half } @multiuse_fneg_vop2_vop3_users_f16(ptr addrspace(1) %out,
; VI-NEXT: v_mul_f16_e64 v1, -v2, v4
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: multiuse_fneg_vop2_vop3_users_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, -v2, v3, 2.0
+; GFX11-NEXT: v_mul_f16_e64 v1, -v2, v4
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: multiuse_fneg_vop2_vop3_users_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_fma_f16 v0.l, -v2.l, v3.l, 2.0
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v2.l, v4.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: multiuse_fneg_vop2_vop3_users_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v0, -v2, v3, 2.0
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v1, -v2, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: multiuse_fneg_vop2_vop3_users_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, -v2.l, v3.l, 2.0
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v2.l, v4.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: multiuse_fneg_vop2_vop3_users_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, -v2, v3, 2.0
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v1, -v2, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -6482,6 +5734,23 @@ define { half, half } @free_fold_src_code_size_cost_use_f16(ptr addrspace(1) %ou
; VI-NSZ-NEXT: v_mul_f16_e32 v1, v1, v5
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-SAFE-LABEL: free_fold_src_code_size_cost_use_f16:
+; GFX11-SAFE: ; %bb.0:
+; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-NEXT: v_fma_f16 v1, v2, v3, 2.0
+; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-NEXT: v_mul_f16_e64 v0, -v1, v4
+; GFX11-SAFE-NEXT: v_mul_f16_e64 v1, -v1, v5
+; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-LABEL: free_fold_src_code_size_cost_use_f16:
+; GFX11-NSZ: ; %bb.0:
+; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-NEXT: v_fma_f16 v1, v2, -v3, -2.0
+; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, v1, v4
+; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, v1, v5
+; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: free_fold_src_code_size_cost_use_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6490,16 +5759,6 @@ define { half, half } @free_fold_src_code_size_cost_use_f16(ptr addrspace(1) %ou
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.h, v4.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v1.l, -v0.h, v5.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: free_fold_src_code_size_cost_use_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v1, v2, v3, 2.0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v1, v4
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v1, -v1, v5
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: free_fold_src_code_size_cost_use_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6508,15 +5767,6 @@ define { half, half } @free_fold_src_code_size_cost_use_f16(ptr addrspace(1) %ou
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.h, v4.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, v0.h, v5.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: free_fold_src_code_size_cost_use_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v1, v2, -v3, -2.0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, v1, v4
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, v1, v5
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -6554,6 +5804,13 @@ define half @one_use_cost_to_fold_into_src_f16(ptr addrspace(1) %out, half %a, h
; VI-NEXT: v_fma_f16 v0, -v0, v3, v4
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: one_use_cost_to_fold_into_src_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_trunc_f16_e32 v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_fma_f16 v0, -v0, v3, v4
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: one_use_cost_to_fold_into_src_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6561,15 +5818,6 @@ define half @one_use_cost_to_fold_into_src_f16(ptr addrspace(1) %out, half %a, h
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v3.l, v4.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: one_use_cost_to_fold_into_src_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_trunc_f16_e32 v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v0, -v0, v3, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: one_use_cost_to_fold_into_src_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6577,14 +5825,6 @@ define half @one_use_cost_to_fold_into_src_f16(ptr addrspace(1) %out, half %a, h
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v3.l, v4.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: one_use_cost_to_fold_into_src_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_trunc_f16_e32 v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, -v0, v3, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -6620,6 +5860,14 @@ define { half, half } @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ha
; VI-NEXT: v_mul_f16_e32 v1, v1, v5
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: multi_use_cost_to_fold_into_src:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_trunc_f16_e32 v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_fma_f16 v0, -v1, v3, v4
+; GFX11-NEXT: v_mul_f16_e32 v1, v1, v5
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: multi_use_cost_to_fold_into_src:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6628,16 +5876,6 @@ define { half, half } @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ha
; GFX11-SAFE-TRUE16-NEXT: v_fma_f16 v0.l, -v0.h, v3.l, v4.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v1.l, v0.h, v5.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: multi_use_cost_to_fold_into_src:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_trunc_f16_e32 v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_fma_f16 v0, -v1, v3, v4
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v1, v1, v5
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: multi_use_cost_to_fold_into_src:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6646,15 +5884,6 @@ define { half, half } @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ha
; GFX11-NSZ-TRUE16-NEXT: v_fma_f16 v0.l, -v0.h, v3.l, v4.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v1.l, v0.h, v5.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: multi_use_cost_to_fold_into_src:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_trunc_f16_e32 v1, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_fma_f16 v0, -v1, v3, v4
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v1, v1, v5
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -6752,29 +5981,21 @@ define half @nnan_fmul_neg1_to_fneg(half %x, half %y) #0 {
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: nnan_fmul_neg1_to_fneg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: nnan_fmul_neg1_to_fneg:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: nnan_fmul_neg1_to_fneg:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: nnan_fmul_neg1_to_fneg:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: nnan_fmul_neg1_to_fneg:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %x, -1.0
%add = fmul nnan half %mul, %y
ret half %add
@@ -6799,29 +6020,21 @@ define half @denormal_fmul_neg1_to_fneg(half %x, half %y) {
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: denormal_fmul_neg1_to_fneg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: denormal_fmul_neg1_to_fneg:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: denormal_fmul_neg1_to_fneg:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: denormal_fmul_neg1_to_fneg:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: denormal_fmul_neg1_to_fneg:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nnan half %x, -1.0
%add = fmul half %mul, %y
ret half %add
@@ -6848,6 +6061,13 @@ define half @denorm_snan_fmul_neg1_to_fneg(half %x, half %y) {
; VI-NEXT: v_mul_f16_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: denorm_snan_fmul_neg1_to_fneg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: denorm_snan_fmul_neg1_to_fneg:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6855,15 +6075,6 @@ define half @denorm_snan_fmul_neg1_to_fneg(half %x, half %y) {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: denorm_snan_fmul_neg1_to_fneg:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: denorm_snan_fmul_neg1_to_fneg:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6871,14 +6082,6 @@ define half @denorm_snan_fmul_neg1_to_fneg(half %x, half %y) {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: denorm_snan_fmul_neg1_to_fneg:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%canonical = fmul half %x, %x
%mul = fmul half %canonical, -1.0
%add = fmul half %mul, %y
@@ -6903,6 +6106,13 @@ define half @flush_snan_fmul_neg1_to_fneg(half %x, half %y) #0 {
; VI-NEXT: v_mul_f16_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: flush_snan_fmul_neg1_to_fneg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: flush_snan_fmul_neg1_to_fneg:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6910,15 +6120,6 @@ define half @flush_snan_fmul_neg1_to_fneg(half %x, half %y) #0 {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: flush_snan_fmul_neg1_to_fneg:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: flush_snan_fmul_neg1_to_fneg:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6926,14 +6127,6 @@ define half @flush_snan_fmul_neg1_to_fneg(half %x, half %y) #0 {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: flush_snan_fmul_neg1_to_fneg:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%quiet = call half @llvm.canonicalize.f16(half %x)
%mul = fmul half %quiet, -1.0
%add = fmul half %mul, %y
@@ -6963,6 +6156,14 @@ define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
; VI-NEXT: v_sub_f16_e32 v0, v3, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-LABEL: fadd_select_fneg_fneg_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_sub_f16_e32 v0, v3, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: fadd_select_fneg_fneg_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6971,16 +6172,6 @@ define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_sub_f16_e32 v0.l, v3.l, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: fadd_select_fneg_fneg_f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v0, v3, v0
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-NSZ-TRUE16-LABEL: fadd_select_fneg_fneg_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6989,15 +6180,6 @@ define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e32 v0.l, v3.l, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: fadd_select_fneg_fneg_f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e32 v0, v3, v0
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %arg0, 0
%neg.x = fneg half %x
%neg.y = fneg half %y
@@ -7078,3 +6260,6 @@ attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "unsafe-fp-math"="true" }
attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11-NSZ-FAKE16: {{.*}}
+; GFX11-SAFE-FAKE16: {{.*}}
More information about the llvm-commits
mailing list