[llvm] [AMDGPU][True16][CodeGen] optimize codegen for mad-mix in true16 (PR #124995)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 14:46:11 PDT 2025
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/124995
>From d4343302992b2276e87178b7ac76efdcef2c1f14 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 27 Feb 2025 14:05:32 -0500
Subject: [PATCH 1/4] enable fdiv test in GISEL
---
.../CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll | 2613 ++++++++++++++++-
1 file changed, 2569 insertions(+), 44 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
index 5ba036c386a40..ac5ea85d4f83f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
@@ -9,11 +9,13 @@
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9,GFX9-IEEE %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9,GFX9-FLUSH %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-IEEE %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-FLUSH %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-IEEE %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-FLUSH %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-IEEE %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FLUSH %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-IEEE,GFX11-IEEE-TRUE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-IEEE,GFX11-IEEE-FAKE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FLUSH,GFX11-FLUSH-TRUE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FLUSH,GFX11-FLUSH-FAKE16 %s
define half @v_fdiv_f16(half %a, half %b) {
; GFX6-IEEE-LABEL: v_fdiv_f16:
@@ -168,6 +170,81 @@ define half @v_fdiv_f16(half %a, half %b) {
; GFX10-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -214,6 +291,37 @@ define half @v_fdiv_f16_afn(half %a, half %b) {
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_afn:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_afn:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_afn:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_afn:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f16_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -378,6 +486,81 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
; GFX10-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f16_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -447,11 +630,35 @@ define half @v_neg_rcp_f16(half %x) {
; GFX89-NEXT: v_rcp_f16_e64 v0, -v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_neg_rcp_f16:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_neg_rcp_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rcp_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rcp_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rcp_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rcp_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half -1.0, %x
ret half %fdiv
}
@@ -504,11 +711,35 @@ define half @v_rcp_f16(half %x) {
; GFX89-NEXT: v_rcp_f16_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_rcp_f16:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_rcp_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rcp_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half 1.0, %x
ret half %fdiv
}
@@ -561,11 +792,35 @@ define half @v_rcp_f16_arcp(half %x) {
; GFX89-NEXT: v_rcp_f16_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_rcp_f16_arcp:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_rcp_f16_arcp:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rcp_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16_arcp:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16_arcp:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16_arcp:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16_arcp:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp half 1.0, %x
ret half %fdiv
}
@@ -587,11 +842,35 @@ define half @v_rcp_f16_arcp_afn(half %x) {
; GFX89-NEXT: v_rcp_f16_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_rcp_f16_arcp_afn:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_rcp_f16_arcp_afn:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rcp_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16_arcp_afn:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16_arcp_afn:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16_arcp_afn:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16_arcp_afn:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp afn half 1.0, %x
ret half %fdiv
}
@@ -644,11 +923,35 @@ define half @v_rcp_f16_ulp25(half %x) {
; GFX89-NEXT: v_rcp_f16_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_rcp_f16_ulp25:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_rcp_f16_ulp25:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rcp_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half 1.0, %x
ret half %fdiv
}
@@ -678,6 +981,37 @@ define half @v_fdiv_f16_afn_ulp25(half %a, half %b) {
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_afn_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_afn_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_afn_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_afn_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f16_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -745,6 +1079,37 @@ define half @v_fdiv_f16_arcp_ulp25(half %a, half %b) {
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_arcp_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_arcp_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_arcp_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_arcp_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f16_arcp_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1040,6 +1405,129 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1118,6 +1606,53 @@ define <2 x half> @v_fdiv_v2f16_afn(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_afn:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_afn:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_afn:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_afn:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_v2f16_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1418,6 +1953,129 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_v2f16_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1721,6 +2379,123 @@ define <2 x half> @v_rcp_v2f16(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rcp_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2024,6 +2799,123 @@ define <2 x half> @v_neg_rcp_v2f16(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rcp_v2f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rcp_v2f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rcp_v2f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rcp_v2f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rcp_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2343,6 +3235,133 @@ define <2 x half> @v_rcp_v2f16_fabs(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_fabs:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_fabs:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_fabs:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_fabs:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rcp_v2f16_fabs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2665,6 +3684,133 @@ define <2 x half> @v_neg_rcp_v2f16_fabs(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rcp_v2f16_fabs:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rcp_v2f16_fabs:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rcp_v2f16_fabs:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rcp_v2f16_fabs:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rcp_v2f16_fabs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2795,6 +3941,43 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_arcp:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_arcp:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_arcp:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_arcp:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rcp_v2f16_arcp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2847,6 +4030,43 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_arcp_afn:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_arcp_afn:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_arcp_afn:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_arcp_afn:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rcp_v2f16_arcp_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3131,6 +4351,123 @@ define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rcp_v2f16_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3209,6 +4546,53 @@ define <2 x half> @v_fdiv_v2f16_afn_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_afn_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_afn_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_afn_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_afn_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_v2f16_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3327,6 +4711,53 @@ define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_v2f16_arcp_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3389,6 +4820,53 @@ define <2 x half> @v_fdiv_v2f16_arcp_afn_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3562,6 +5040,77 @@ define amdgpu_ps i16 @s_fdiv_f16(i16 inreg %a.arg, i16 inreg %b.arg) {
; GFX10-FLUSH-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-FLUSH-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_fdiv_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_fdiv_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_fdiv_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, s1
@@ -3642,6 +5191,37 @@ define amdgpu_ps i16 @s_fdiv_f16_arcp(i16 inreg %a.arg, i16 inreg %b.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_fdiv_f16_arcp:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_fdiv_f16_arcp:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_f16_arcp:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_f16_arcp:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_fdiv_f16_arcp:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_rcp_f16_e32 v0, s1
@@ -3681,6 +5261,37 @@ define amdgpu_ps i16 @s_fdiv_f16_afn(i16 inreg %a.arg, i16 inreg %b.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_fdiv_f16_afn:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_fdiv_f16_afn:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_f16_afn:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_f16_afn:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_fdiv_f16_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_rcp_f16_e32 v0, s1
@@ -3995,6 +5606,101 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) {
; GFX10-FLUSH-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-FLUSH-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_fdiv_v2f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_fdiv_v2f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_lshr_b32 s2, s1, 16
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s2
+; GFX11-IEEE-FAKE16-NEXT: s_lshr_b32 s3, s0, 16
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, s3
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v2, v0 :: v_dual_mul_f32 v3, v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_fmac_f32 v2, v4, v0 :: v_dual_fmac_f32 v3, v5, v1
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v0, v4, v0 :: v_dual_mul_f32 v1, v5, v1
+; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v1, v1, v3 :: v_dual_and_b32 v0, 0xff800000, v0
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v1, s2, s3
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_v2f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_v2f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s2, s1, 16
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s2
+; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s3, s0, 16
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, s3
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v2, v0 :: v_dual_mul_f32 v3, v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_fmac_f32 v2, v4, v0 :: v_dual_fmac_f32 v3, v5, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v0, v4, v0 :: v_dual_mul_f32 v1, v5, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v1, v1, v3 :: v_dual_and_b32 v0, 0xff800000, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v1, s2, s3
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_fdiv_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_lshr_b32 s2, s1, 16
@@ -4084,6 +5790,33 @@ define amdgpu_ps i16 @s_rcp_f16(i16 inreg %a.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_rcp_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_rcp_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, s0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_rcp_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_rcp_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, s0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_rcp_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_rcp_f16_e32 v0, s0
@@ -4150,6 +5883,33 @@ define amdgpu_ps i16 @s_neg_rcp_f16(i16 inreg %a.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_neg_rcp_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -s0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_neg_rcp_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -s0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_neg_rcp_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -s0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_neg_rcp_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -s0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_neg_rcp_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_rcp_f16_e64 v0, -s0
@@ -4222,6 +5982,33 @@ define amdgpu_ps i16 @s_rsq_f16(i16 inreg %a.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_rsq_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, s0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_rsq_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, s0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_rsq_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, s0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_rsq_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, s0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_rsq_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_rsq_f16_e32 v0, s0
@@ -4525,6 +6312,131 @@ define amdgpu_ps i32 @s_rsq_v2f16(i32 inreg %a.arg) {
; GFX10-FLUSH-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-FLUSH-NEXT: ; return to shader part epilog
;
+; GFX11-IEEE-TRUE16-LABEL: s_rsq_v2f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, s0
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, -1.0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v2, v1
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_f32 v5, v5, v1, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v3, -v3, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v1
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v3, v3, v5
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v2, v6, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v0.l, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v1, v4, v1
+; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-IEEE-FAKE16-LABEL: s_rsq_v2f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_lshr_b32 s1, s0, 16
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, s0
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v1, s1
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-TRUE16-LABEL: s_rsq_v2f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, s0
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v2, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_f32 v5, v5, v1, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v3, -v3, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v3, v3, v5
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v2, v6, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v0.l, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v1, v4, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FLUSH-FAKE16-LABEL: s_rsq_v2f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s1, s0, 16
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, s0
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v1, s1
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
; GFX11-LABEL: s_rsq_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_lshr_b32 s1, s0, 16
@@ -4617,11 +6529,35 @@ define half @v_rsq_f16(half %a) {
; GFX89-NEXT: v_rsq_f16_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_rsq_f16:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rsq_f16_e32 v0, v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_rsq_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rsq_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract half 1.0, %sqrt
ret half %fdiv
@@ -4689,6 +6625,37 @@ define half @v_neg_rsq_f16(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rsq_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4765,6 +6732,41 @@ define { half, half } @v_rsq_f16_multi_use(half %a) {
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_multi_use:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v2.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v1.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_multi_use:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v1, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_multi_use:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v2.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v1.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_multi_use:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v1, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rsq_f16_multi_use:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4842,6 +6844,37 @@ define half @v_rsq_f16_missing_contract0(half %a) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_missing_contract0:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_missing_contract0:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_missing_contract0:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_missing_contract0:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rsq_f16_missing_contract0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4916,6 +6949,37 @@ define half @v_rsq_f16_missing_contract1(half %a) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_missing_contract1:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_missing_contract1:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_missing_contract1:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_missing_contract1:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rsq_f16_missing_contract1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4990,6 +7054,37 @@ define half @v_neg_rsq_f16_missing_contract0(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_missing_contract0:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_missing_contract0:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_missing_contract0:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_missing_contract0:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rsq_f16_missing_contract0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5064,6 +7159,37 @@ define half @v_neg_rsq_f16_missing_contract1(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_missing_contract1:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_missing_contract1:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_missing_contract1:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_missing_contract1:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rsq_f16_missing_contract1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5138,6 +7264,37 @@ define half @v_neg_rsq_f16_fabs(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_fabs:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e64 v0.l, |v0.l|
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_fabs:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e64 v0, |v0|
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_fabs:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e64 v0.l, |v0.l|
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_fabs:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e64 v0, |v0|
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rsq_f16_fabs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5205,11 +7362,35 @@ define half @v_rsq_f16_arcp(half %a) {
; GFX89-NEXT: v_rsq_f16_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_rsq_f16_arcp:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rsq_f16_e32 v0, v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_rsq_f16_arcp:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rsq_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_arcp:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_arcp:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_arcp:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_arcp:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract arcp half 1.0, %sqrt
ret half %fdiv
@@ -5277,6 +7458,37 @@ define half @v_neg_rsq_f16_arcp(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_arcp:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_arcp:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_arcp:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_arcp:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rsq_f16_arcp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5309,11 +7521,35 @@ define half @v_rsq_f16_afn(half %a) {
; GFX89-NEXT: v_rsq_f16_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_rsq_f16_afn:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_rsq_f16_e32 v0, v0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_rsq_f16_afn:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rsq_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_afn:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_afn:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_afn:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_afn:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv afn contract half 1.0, %sqrt
ret half %fdiv
@@ -5347,6 +7583,37 @@ define half @v_rsq_f16_afn_nocontract(half %a) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_afn_nocontract:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_afn_nocontract:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_afn_nocontract:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_afn_nocontract:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rsq_f16_afn_nocontract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5637,6 +7904,135 @@ define <2 x half> @v_rsq_v2f16(<2 x half> %a) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_rsq_v2f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_rsq_v2f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_rsq_v2f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_rsq_v2f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rsq_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5951,6 +8347,135 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
+; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_v2f16:
+; GFX11-IEEE-TRUE16: ; %bb.0:
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
+; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_v2f16:
+; GFX11-IEEE-FAKE16: ; %bb.0:
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_v2f16:
+; GFX11-FLUSH-TRUE16: ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
+; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_v2f16:
+; GFX11-FLUSH-FAKE16: ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_neg_rsq_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
>From b5140b465bf6bf9e3bb93b68e0b36bfb587791ee Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 27 Feb 2025 14:06:08 -0500
Subject: [PATCH 2/4] fix mad-mix pattern in sdag and gisel
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 8 +
.../CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll | 32 +--
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 264 +++++++++---------
llvm/test/CodeGen/AMDGPU/mad-mix.ll | 165 +++--------
5 files changed, 194 insertions(+), 279 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 536bf0c208752..a4a017cbc46aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3665,6 +3665,10 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
// TODO: Should we try to look for neg/abs here?
}
+ // Prevent unnecessary subreg COPY to VGPR_16
+ if (Subtarget->useRealTrue16Insts() && Src.getOpcode() == ISD::TRUNCATE) {
+ Src = Src.getOperand(0);
+ }
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6ef7505ec6f62..fc2311401d794 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5878,6 +5878,14 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
CheckAbsNeg();
}
+ // Since we looked through FPEXT and removed it, we must also remove
+ // G_TRUNC. G_TRUNC to 16-bits would have a destination in RC VGPR_16, which
+ // is not compatible with MadMix instructions
+ Register PeekSrc = Src;
+ if (Subtarget->useRealTrue16Insts() &&
+ mi_match(PeekSrc, *MRI, m_GTrunc(m_Reg(PeekSrc))))
+ Src = PeekSrc;
+
Matched = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
index ac5ea85d4f83f..1651a806f2f68 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
@@ -175,14 +175,12 @@ define half @v_fdiv_f16(half %a, half %b) {
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -213,14 +211,12 @@ define half @v_fdiv_f16(half %a, half %b) {
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -491,14 +487,12 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -529,14 +523,12 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v3, v5 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v3, v5 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index beac41e42e0c6..884ebd9c0ea08 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -269,11 +269,19 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2
}
define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 {
-; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX1100-TRUE16-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-GFX1100-TRUE16: ; %bb.0:
+; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_max_f16_e64 v0.l, v0.l, v0.l clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX1100-FAKE16-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-GFX1100-FAKE16: ; %bb.0:
+; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
; GFX900: ; %bb.0:
@@ -304,6 +312,12 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GISEL-GFX1100: ; %bb.0:
+; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -410,10 +424,11 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v4, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.h, v3.l
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32:
@@ -532,11 +547,12 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v7, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v6.l
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v6
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32:
@@ -702,14 +718,14 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v1.h, v7.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v8, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v6, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v6.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.h, v7.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32:
@@ -910,27 +926,14 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
; FIXME (DAG): Fold clamp
define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v4, v5 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX1100-NEXT: v_mov_b32_e32 v0, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt:
; GFX900: ; %bb.0:
@@ -987,15 +990,6 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; GISEL-GFX1100: ; %bb.0:
-; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1059,17 +1053,14 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.h, v6.l
+; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, 0
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v3, v3 clamp
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
@@ -1273,55 +1264,53 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v10.l, v3.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v11.l, v5.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v6, v7, v8 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v9, v10, v11 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v6.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.h, v7.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-FAKE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v7, v7 clamp
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_mov_b32_e32 v0, v6
-; GFX906-NEXT: v_mov_b32_e32 v1, v2
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v7, v7 clamp
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-VI: ; %bb.0:
@@ -1412,6 +1401,28 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
; GISEL-GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v2
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1506,11 +1517,9 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v4, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_max_f16_e64 v3.l, v3.l, v3.l clamp
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
@@ -1675,13 +1684,9 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v4, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -1851,15 +1856,12 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_precvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v3, v4, v5 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v3
; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -2012,18 +2014,13 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_precvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v2, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v2, v6, v7, v8 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v1, v3, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v6
; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -2211,20 +2208,13 @@ define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %sr
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_precvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v9.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v10.l, v4.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v11.l, v5.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v2, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v3, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v2, v8, v9, v10 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v3, v6, v7, v11 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v6
+; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v7
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
; SDAG-GFX1100-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v3
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index e1e356a92f28e..99e9e292c062d 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -198,26 +198,14 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %
}
define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v3, v3, v4, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_mad_mix_v2f32:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-NEXT: v_mov_b32_e32 v0, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
; SDAG-GFX900: ; %bb.0:
@@ -281,15 +269,6 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX1100-LABEL: v_mad_mix_v2f32:
-; GISEL-GFX1100: ; %bb.0:
-; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
; GISEL-GFX900: ; %bb.0:
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -352,24 +331,14 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
}
define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_shuffle:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v4, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_shuffle:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
+; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-NEXT: v_mov_b32_e32 v0, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_mad_mix_v2f32_shuffle:
; GFX900: ; %bb.0:
@@ -428,15 +397,6 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
; SDAG-CI-NEXT: v_mad_f32 v1, v4, v3, v5
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
-; GISEL-GFX1100: ; %bb.0:
-; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
-; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1247,28 +1207,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
}
define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_f32imm1:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_mov_b32 s0, 1.0
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v2, v2, v3, s0 op_sel_hi:[1,1,0]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_f32imm1:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: s_mov_b32 s0, 1.0
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-GFX1100: ; %bb.0:
+; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
+; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
+; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
+; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
; SDAG-GFX900: ; %bb.0:
@@ -1392,28 +1339,15 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
}
define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_mov_b32 s0, 0x3e230000
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v2, v2, v3, s0 op_sel_hi:[1,1,0]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: s_mov_b32 s0, 0x3e230000
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-GFX1100: ; %bb.0:
+; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
+; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
+; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
+; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; SDAG-GFX900: ; %bb.0:
@@ -1544,28 +1478,15 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
}
define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_f32imminv2pi:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_mov_b32 s0, 0.15915494
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mix_f32 v2, v2, v3, s0 op_sel_hi:[1,1,0]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_f32imminv2pi:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: s_mov_b32 s0, 0.15915494
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-GFX1100: ; %bb.0:
+; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
+; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
+; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
+; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; SDAG-GFX900: ; %bb.0:
>From 36b4eddf3e52d0b0699f5b99586b9195d99286bf Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 3 Mar 2025 16:59:14 -0500
Subject: [PATCH 3/4] address comment
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 3 +-
llvm/test/CodeGen/AMDGPU/mad-mix.ll | 80 ++++++++-----------
2 files changed, 34 insertions(+), 49 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index a4a017cbc46aa..30fe35891b45e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3666,7 +3666,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
}
// Prevent unnecessary subreg COPY to VGPR_16
- if (Subtarget->useRealTrue16Insts() && Src.getOpcode() == ISD::TRUNCATE) {
+ if (Src.getOpcode() == ISD::TRUNCATE &&
+ Src.getOperand(0).getValueType().getSizeInBits() == 32) {
Src = Src.getOperand(0);
}
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index 99e9e292c062d..e4ad51452ef2b 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -207,21 +207,21 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; GFX1100-NEXT: v_mov_b32_e32 v0, v3
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
-; SDAG-GFX900: ; %bb.0:
-; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_mad_mix_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX906-LABEL: v_mad_mix_v2f32:
-; SDAG-GFX906: ; %bb.0:
-; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+; GFX906-LABEL: v_mad_mix_v2f32:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_mov_b32_e32 v0, v3
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32:
; SDAG-GFX9GEN: ; %bb.0:
@@ -269,22 +269,6 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
-; GISEL-GFX900: ; %bb.0:
-; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX906-LABEL: v_mad_mix_v2f32:
-; GISEL-GFX906: ; %bb.0:
-; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32:
; GISEL-GFX9GEN: ; %bb.0:
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1221,18 +1205,18 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v2
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
@@ -1353,18 +1337,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v2
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
@@ -1492,18 +1476,18 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v2
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
>From 0204402304c324c7b7e75ef81ddeb196c56f053b Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 17 Apr 2025 14:17:05 -0400
Subject: [PATCH 4/4] address PR
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 4 +-
.../CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll | 4305 +++++------------
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 141 +-
3 files changed, 1278 insertions(+), 3172 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index fc2311401d794..1c96384901b78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5882,8 +5882,8 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
// G_TRUNC. G_TRUNC to 16-bits would have a destination in RC VGPR_16, which
// is not compatible with MadMix instructions
Register PeekSrc = Src;
- if (Subtarget->useRealTrue16Insts() &&
- mi_match(PeekSrc, *MRI, m_GTrunc(m_Reg(PeekSrc))))
+ if (mi_match(PeekSrc, *MRI, m_GTrunc(m_Reg(PeekSrc))) &&
+ MRI->getType(PeekSrc).getSizeInBits() == 32)
Src = PeekSrc;
Matched = true;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
index 1651a806f2f68..162ab4ebc43c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
@@ -12,10 +12,10 @@
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-IEEE %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-FLUSH %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-IEEE,GFX11-IEEE-TRUE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-IEEE,GFX11-IEEE-FAKE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FLUSH,GFX11-FLUSH-TRUE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FLUSH,GFX11-FLUSH-FAKE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-IEEE,GFX11-TRUE16,GFX11-IEEE-TRUE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-IEEE,GFX11-FAKE16,GFX11-IEEE-FAKE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FLUSH,GFX11-TRUE16,GFX11-FLUSH-TRUE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FLUSH,GFX11-FAKE16,GFX11-FLUSH-FAKE16 %s
define half @v_fdiv_f16(half %a, half %b) {
; GFX6-IEEE-LABEL: v_fdiv_f16:
@@ -170,94 +170,41 @@ define half @v_fdiv_f16(half %a, half %b) {
; GFX10-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half %a, %b
ret half %fdiv
}
@@ -287,44 +234,21 @@ define half @v_fdiv_f16_afn(half %a, half %b) {
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_afn:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_afn:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_afn:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_afn:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_f16_afn:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_f16_afn:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_f16_afn:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv afn half %a, %b
ret half %fdiv
}
@@ -482,94 +406,41 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
; GFX10-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_f16_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v2
-; GFX11-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_mul_f32_e32 v2, v4, v2
-; GFX11-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_f16_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_f16_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v1
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v2
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v4, -v1, v3, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half %a, %b
ret half %fdiv
}
@@ -628,29 +499,17 @@ define half @v_neg_rcp_f16(half %x) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rcp_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rcp_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rcp_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rcp_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rcp_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rcp_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half -1.0, %x
ret half %fdiv
}
@@ -709,29 +568,17 @@ define half @v_rcp_f16(half %x) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half 1.0, %x
ret half %fdiv
}
@@ -790,29 +637,17 @@ define half @v_rcp_f16_arcp(half %x) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16_arcp:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16_arcp:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16_arcp:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16_arcp:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_f16_arcp:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_f16_arcp:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp half 1.0, %x
ret half %fdiv
}
@@ -840,29 +675,17 @@ define half @v_rcp_f16_arcp_afn(half %x) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16_arcp_afn:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16_arcp_afn:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16_arcp_afn:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16_arcp_afn:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_f16_arcp_afn:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_f16_arcp_afn:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp afn half 1.0, %x
ret half %fdiv
}
@@ -921,29 +744,17 @@ define half @v_rcp_f16_ulp25(half %x) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_f16_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_f16_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_f16_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_f16_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_f16_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_f16_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half 1.0, %x
ret half %fdiv
}
@@ -973,44 +784,21 @@ define half @v_fdiv_f16_afn_ulp25(half %a, half %b) {
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_afn_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_afn_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_afn_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_afn_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_f16_afn_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_f16_afn_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_f16_afn_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv afn half %a, %b
ret half %fdiv
}
@@ -1071,44 +859,21 @@ define half @v_fdiv_f16_arcp_ulp25(half %a, half %b) {
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_f16_arcp_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_f16_arcp_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_f16_arcp_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_f16_arcp_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_f16_arcp_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_f16_arcp_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_f16_arcp_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp half %a, %b
ret half %fdiv
}
@@ -1397,157 +1162,67 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v6, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: v_cvt_f32_f16_e32 v7, v5
-; GFX11-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
-; GFX11-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
-; GFX11-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
-; GFX11-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
-; GFX11-NEXT: v_add_f32_e32 v4, v4, v7
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX11-NEXT: v_div_fixup_f16 v0, v3, v1, v0
-; GFX11-NEXT: v_div_fixup_f16 v1, v4, v2, v5
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv <2 x half> %a, %b
ret <2 x half> %fdiv
}
@@ -1598,65 +1273,29 @@ define <2 x half> @v_fdiv_v2f16_afn(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_afn:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_afn:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_afn:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_afn:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_v2f16_afn:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_v2f16_afn:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_v2f16_afn:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv afn <2 x half> %a, %b
ret <2 x half> %fdiv
}
@@ -1945,157 +1584,67 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_v2f16_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v6, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: v_cvt_f32_f16_e32 v7, v5
-; GFX11-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
-; GFX11-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
-; GFX11-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
-; GFX11-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
-; GFX11-NEXT: v_add_f32_e32 v4, v4, v7
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX11-NEXT: v_div_fixup_f16 v0, v3, v1, v0
-; GFX11-NEXT: v_div_fixup_f16 v1, v4, v2, v5
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_v2f16_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.h
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v2
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v10, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v11, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v4, v10, v2
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v6, v11, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v7 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v8, v6, v9 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_mul_f32 v3, v7, v3
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.h, v3
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v2.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v2.h, v1.h, v0.h
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_v2f16_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v7, v5
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v6, v6, v3 :: v_dual_mul_f32 v7, v7, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_fmac_f32 v6, v8, v3 :: v_dual_fmac_f32 v7, v9, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v8, -v1, v6, v0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v9, -v1, v7, v0 op_sel:[1,0,1] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v3, v8, v3 :: v_dual_mul_f32 v4, v9, v4
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v6 :: v_dual_and_b32 v4, 0xff800000, v4
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v3, v1, v0
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v4, v2, v5
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv <2 x half> %a, %b
ret <2 x half> %fdiv
}
@@ -2371,151 +1920,64 @@ define <2 x half> @v_rcp_v2f16(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rcp_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv <2 x half> <half 1.0, half 1.0>, %x
ret <2 x half> %fdiv
}
@@ -2791,151 +2253,64 @@ define <2 x half> @v_neg_rcp_v2f16(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rcp_v2f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rcp_v2f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rcp_v2f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rcp_v2f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rcp_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rcp_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rcp_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv <2 x half> <half -1.0, half -1.0>, %x
ret <2 x half> %fdiv
}
@@ -3227,163 +2602,69 @@ define <2 x half> @v_rcp_v2f16_fabs(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_fabs:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_fabs:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_fabs:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_fabs:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rcp_v2f16_fabs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cvt_f32_f16_e32 v5, 1.0
-; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v6, v5, v3
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v3
-; GFX11-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v5, v4
-; GFX11-NEXT: v_fma_mix_f32 v8, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v8, v4
-; GFX11-NEXT: v_fma_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
-; GFX11-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_div_fixup_f16 v0, v0, v2, 1.0
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_v2f16_fabs:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, 1.0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, 1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_v2f16_fabs:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, 1.0
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, 1.0
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%x.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fdiv = fdiv <2 x half> <half 1.0, half 1.0>, %x.fabs
ret <2 x half> %fdiv
@@ -3676,163 +2957,69 @@ define <2 x half> @v_neg_rcp_v2f16_fabs(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rcp_v2f16_fabs:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rcp_v2f16_fabs:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rcp_v2f16_fabs:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rcp_v2f16_fabs:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rcp_v2f16_fabs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cvt_f32_f16_e32 v5, -1.0
-; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v6, v5, v3
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, v2
-; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v3
-; GFX11-NEXT: v_rcp_f32_e32 v4, v4
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v5, v4
-; GFX11-NEXT: v_fma_mix_f32 v8, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v8, v4
-; GFX11-NEXT: v_fma_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
-; GFX11-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_div_fixup_f16 v0, v0, v2, -1.0
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rcp_v2f16_fabs:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v0
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v0
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, v0, v6
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v1.l, -1.0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.h, -1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rcp_v2f16_fabs:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v5, -1.0
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v6, v5, v3
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v6, v7, v3
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v4, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v5, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v8, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v8, v4
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v2, -1.0
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%x.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fdiv = fdiv <2 x half> <half -1.0, half -1.0>, %x.fabs
ret <2 x half> %fdiv
@@ -3933,52 +3120,24 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_arcp:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_arcp:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_arcp:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_arcp:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rcp_v2f16_arcp:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_v2f16_arcp:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_v2f16_arcp:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp <2 x half> <half 1.0, half 1.0>, %x
ret <2 x half> %fdiv
}
@@ -4022,52 +3181,24 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_arcp_afn:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_arcp_afn:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_arcp_afn:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_arcp_afn:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rcp_v2f16_arcp_afn:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_v2f16_arcp_afn:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_v2f16_arcp_afn:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp afn <2 x half> <half 1.0, half 1.0>, %x
ret <2 x half> %fdiv
}
@@ -4343,151 +3474,64 @@ define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rcp_v2f16_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rcp_v2f16_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rcp_v2f16_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rcp_v2f16_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rcp_v2f16_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rcp_v2f16_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rcp_v2f16_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv <2 x half> <half 1.0, half 1.0>, %x
ret <2 x half> %fdiv
}
@@ -4538,65 +3582,29 @@ define <2 x half> @v_fdiv_v2f16_afn_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_afn_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_afn_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_afn_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_afn_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_v2f16_afn_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_v2f16_afn_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_v2f16_afn_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv afn <2 x half> %a, %b
ret <2 x half> %fdiv
}
@@ -4703,65 +3711,29 @@ define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_v2f16_arcp_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_v2f16_arcp_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp <2 x half> %a, %b
ret <2 x half> %fdiv
}
@@ -4812,65 +3784,29 @@ define <2 x half> @v_fdiv_v2f16_arcp_afn_ulp25(<2 x half> %a, <2 x half> %b) {
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT: v_rcp_f16_e32 v1, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT: v_rcp_f16_e32 v2, v2
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT: v_mul_f16_e32 v1, v3, v2
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.l, v1.l
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v1.h, v1.h
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.h, v0.h, v1.h
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v2, v2
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v1, v3, v2
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv afn arcp <2 x half> %a, %b
ret <2 x half> %fdiv
}
@@ -5032,94 +3968,41 @@ define amdgpu_ps i16 @s_fdiv_f16(i16 inreg %a.arg, i16 inreg %b.arg) {
; GFX10-FLUSH-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-FLUSH-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_fdiv_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_fdiv_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v1, v1, v0
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v1, v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_fdiv_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s0
-; GFX11-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v1, v1, v0
-; GFX11-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX11-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: v_div_fixup_f16 v0, v0, s1, s0
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_fdiv_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_fdiv_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i16 %a.arg to half
%b = bitcast i16 %b.arg to half
%fdiv = fdiv half %a, %b
@@ -5183,44 +4066,21 @@ define amdgpu_ps i16 @s_fdiv_f16_arcp(i16 inreg %a.arg, i16 inreg %b.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_fdiv_f16_arcp:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_fdiv_f16_arcp:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_f16_arcp:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_f16_arcp:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_fdiv_f16_arcp:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, s0, v0
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_fdiv_f16_arcp:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_fdiv_f16_arcp:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i16 %a.arg to half
%b = bitcast i16 %b.arg to half
%fdiv = fdiv arcp half %a, %b
@@ -5253,44 +4113,21 @@ define amdgpu_ps i16 @s_fdiv_f16_afn(i16 inreg %a.arg, i16 inreg %b.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_fdiv_f16_afn:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_fdiv_f16_afn:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_f16_afn:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_f16_afn:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_fdiv_f16_afn:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f16_e32 v0, s0, v0
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_fdiv_f16_afn:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, s0, v0.l
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_fdiv_f16_afn:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, s0, v0
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i16 %a.arg to half
%b = bitcast i16 %b.arg to half
%fdiv = fdiv afn half %a, %b
@@ -5598,129 +4435,53 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) {
; GFX10-FLUSH-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-FLUSH-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_fdiv_v2f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_fdiv_v2f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_lshr_b32 s2, s1, 16
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s2
-; GFX11-IEEE-FAKE16-NEXT: s_lshr_b32 s3, s0, 16
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, s3
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v2, v0 :: v_dual_mul_f32 v3, v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_fmac_f32 v2, v4, v0 :: v_dual_fmac_f32 v3, v5, v1
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v0, v4, v0 :: v_dual_mul_f32 v1, v5, v1
-; GFX11-IEEE-FAKE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v1, v1, v3 :: v_dual_and_b32 v0, 0xff800000, v0
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v1, s2, s3
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_fdiv_v2f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_fdiv_v2f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s2, s1, 16
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s2
-; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s3, s0, 16
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, s3
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v2, v0 :: v_dual_mul_f32 v3, v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_fmac_f32 v2, v4, v0 :: v_dual_fmac_f32 v3, v5, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v0, v4, v0 :: v_dual_mul_f32 v1, v5, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v1, v1, v3 :: v_dual_and_b32 v0, 0xff800000, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v1, s2, s3
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_fdiv_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_lshr_b32 s2, s1, 16
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, s1
-; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s2
-; GFX11-NEXT: s_lshr_b32 s3, s0, 16
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, s3
-; GFX11-NEXT: v_rcp_f32_e32 v0, v0
-; GFX11-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_dual_mul_f32 v2, v2, v0 :: v_dual_mul_f32 v3, v3, v1
-; GFX11-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_fmac_f32 v2, v4, v0 :: v_dual_fmac_f32 v3, v5, v1
-; GFX11-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v0, v4, v0 :: v_dual_mul_f32 v1, v5, v1
-; GFX11-NEXT: v_and_b32_e32 v1, 0xff800000, v1
-; GFX11-NEXT: v_dual_add_f32 v1, v1, v3 :: v_dual_and_b32 v0, 0xff800000, v0
-; GFX11-NEXT: v_add_f32_e32 v0, v0, v2
-; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: v_div_fixup_f16 v1, v1, s2, s3
-; GFX11-NEXT: v_div_fixup_f16 v0, v0, s1, s0
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_fdiv_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, s0
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v1, v1, v0
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v1, v2, v0
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v2, -s1, v1, s0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, s1, s0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.l
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_fdiv_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_lshr_b32 s2, s1, 16
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s1
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s2
+; GFX11-FAKE16-NEXT: s_lshr_b32 s3, s0, 16
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, s3
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v2, v2, v0 :: v_dual_mul_f32 v3, v3, v1
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_fmac_f32 v2, v4, v0 :: v_dual_fmac_f32 v3, v5, v1
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v4, -s1, v2, s0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v5, -s2, v3, s3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v0, v4, v0 :: v_dual_mul_f32 v1, v5, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v1, v1, v3 :: v_dual_and_b32 v0, 0xff800000, v0
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v1, s2, s3
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v0, s1, s0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i32 %a.arg to <2 x half>
%b = bitcast i32 %b.arg to <2 x half>
%fdiv = fdiv <2 x half> %a, %b
@@ -5782,39 +4543,19 @@ define amdgpu_ps i16 @s_rcp_f16(i16 inreg %a.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_rcp_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_rcp_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, s0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_rcp_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_rcp_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, s0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_rcp_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_rcp_f16_e32 v0, s0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_rcp_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_rcp_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, s0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i16 %a.arg to half
%fdiv = fdiv half 1.0, %a
%result = bitcast half %fdiv to i16
@@ -5875,39 +4616,19 @@ define amdgpu_ps i16 @s_neg_rcp_f16(i16 inreg %a.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_neg_rcp_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -s0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_neg_rcp_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -s0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_neg_rcp_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -s0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_neg_rcp_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -s0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_neg_rcp_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_rcp_f16_e64 v0, -s0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_neg_rcp_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -s0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_neg_rcp_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_rcp_f16_e64 v0, -s0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i16 %a.arg to half
%fdiv = fdiv half -1.0, %a
%result = bitcast half %fdiv to i16
@@ -5974,39 +4695,19 @@ define amdgpu_ps i16 @s_rsq_f16(i16 inreg %a.arg) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_rsq_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, s0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_rsq_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, s0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_rsq_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, s0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_rsq_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, s0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_rsq_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_rsq_f16_e32 v0, s0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_rsq_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.l, s0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_rsq_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_rsq_f16_e32 v0, s0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i16 %a.arg to half
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract half 1.0, %sqrt
@@ -6304,162 +5005,68 @@ define amdgpu_ps i32 @s_rsq_v2f16(i32 inreg %a.arg) {
; GFX10-FLUSH-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-FLUSH-NEXT: ; return to shader part epilog
;
-; GFX11-IEEE-TRUE16-LABEL: s_rsq_v2f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, s0
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, -1.0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v2, v1
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v2, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_f32 v5, v5, v1, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v3, -v3, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v1
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v2, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v3, v3, v5
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v2, v6, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v2, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v0.l, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v1, v4, v1
-; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-IEEE-FAKE16-LABEL: s_rsq_v2f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_lshr_b32 s1, s0, 16
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, s0
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v1, s1
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-IEEE-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-TRUE16-LABEL: s_rsq_v2f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, s0
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v2, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v2, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_f32 v5, v5, v1, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v3, -v3, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v2, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v3, v3, v5
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v2, v6, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v2, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v0.l, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v1, v4, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-FLUSH-FAKE16-LABEL: s_rsq_v2f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s1, s0, 16
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, s0
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v1, s1
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-FLUSH-FAKE16-NEXT: ; return to shader part epilog
-; GFX11-LABEL: s_rsq_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_lshr_b32 s1, s0, 16
-; GFX11-NEXT: v_sqrt_f16_e32 v0, s0
-; GFX11-NEXT: v_sqrt_f16_e32 v1, s1
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: s_rsq_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, s0
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, -1.0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v2, v2, v1
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_f32 v5, v5, v1, v2
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v3, -v3, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v6, -v4, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v3, v3, v5
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v2, v6, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v2, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v0.l, -1.0
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v1, v4, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_rsq_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_lshr_b32 s1, s0, 16
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, s0
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v1, s1
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
%a = bitcast i32 %a.arg to <2 x half>
%sqrt = call contract <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
%fdiv = fdiv contract <2 x half> <half -1.0, half -1.0>, %sqrt
@@ -6527,29 +5134,17 @@ define half @v_rsq_f16(half %a) {
; GFX10-NEXT: v_rsq_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract half 1.0, %sqrt
ret half %fdiv
@@ -6617,44 +5212,21 @@ define half @v_neg_rsq_f16(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rsq_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rsq_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rsq_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract half -1.0, %sqrt
ret half %fdiv
@@ -6724,49 +5296,23 @@ define { half, half } @v_rsq_f16_multi_use(half %a) {
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_multi_use:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v2.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v1.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_multi_use:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v1, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_multi_use:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v2.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v1.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_multi_use:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v1, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rsq_f16_multi_use:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v2, v0
-; GFX11-NEXT: v_rsq_f16_e32 v1, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_f16_multi_use:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v2.l, v0.l
+; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_f16_multi_use:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v2, v0
+; GFX11-FAKE16-NEXT: v_rsq_f16_e32 v1, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%insert.0 = insertvalue { half, half } poison, half %sqrt, 0
%fdiv = fdiv contract half 1.0, %sqrt
@@ -6836,44 +5382,21 @@ define half @v_rsq_f16_missing_contract0(half %a) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_missing_contract0:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_missing_contract0:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_missing_contract0:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_missing_contract0:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rsq_f16_missing_contract0:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_f16_missing_contract0:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_f16_missing_contract0:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract half 1.0, %sqrt
ret half %fdiv
@@ -6941,44 +5464,21 @@ define half @v_rsq_f16_missing_contract1(half %a) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_missing_contract1:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_missing_contract1:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_missing_contract1:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_missing_contract1:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rsq_f16_missing_contract1:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_f16_missing_contract1:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_f16_missing_contract1:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv half 1.0, %sqrt
ret half %fdiv
@@ -7046,44 +5546,21 @@ define half @v_neg_rsq_f16_missing_contract0(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_missing_contract0:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_missing_contract0:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_missing_contract0:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_missing_contract0:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rsq_f16_missing_contract0:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rsq_f16_missing_contract0:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rsq_f16_missing_contract0:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract half -1.0, %sqrt
ret half %fdiv
@@ -7151,44 +5628,21 @@ define half @v_neg_rsq_f16_missing_contract1(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_missing_contract1:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_missing_contract1:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_missing_contract1:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_missing_contract1:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rsq_f16_missing_contract1:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rsq_f16_missing_contract1:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rsq_f16_missing_contract1:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv half -1.0, %sqrt
ret half %fdiv
@@ -7256,44 +5710,21 @@ define half @v_neg_rsq_f16_fabs(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_fabs:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e64 v0.l, |v0.l|
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_fabs:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e64 v0, |v0|
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_fabs:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e64 v0.l, |v0.l|
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_fabs:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e64 v0, |v0|
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rsq_f16_fabs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e64 v0, |v0|
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rsq_f16_fabs:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e64 v0.l, |v0.l|
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rsq_f16_fabs:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e64 v0, |v0|
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
%sqrt = call contract half @llvm.sqrt.f16(half %a.fabs)
%fdiv = fdiv contract half -1.0, %sqrt
@@ -7360,29 +5791,17 @@ define half @v_rsq_f16_arcp(half %a) {
; GFX10-NEXT: v_rsq_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_arcp:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_arcp:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_arcp:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_arcp:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_f16_arcp:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_f16_arcp:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract arcp half 1.0, %sqrt
ret half %fdiv
@@ -7450,44 +5869,21 @@ define half @v_neg_rsq_f16_arcp(half %a) {
; GFX10-NEXT: v_rcp_f16_e64 v0, -v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_f16_arcp:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_f16_arcp:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_f16_arcp:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_f16_arcp:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rsq_f16_arcp:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e64 v0, -v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rsq_f16_arcp:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rsq_f16_arcp:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv contract arcp half -1.0, %sqrt
ret half %fdiv
@@ -7519,29 +5915,17 @@ define half @v_rsq_f16_afn(half %a) {
; GFX10-NEXT: v_rsq_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_afn:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_afn:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_afn:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_afn:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_f16_afn:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_f16_afn:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_rsq_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract half @llvm.sqrt.f16(half %a)
%fdiv = fdiv afn contract half 1.0, %sqrt
ret half %fdiv
@@ -7575,44 +5959,21 @@ define half @v_rsq_f16_afn_nocontract(half %a) {
; GFX10-NEXT: v_rcp_f16_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_f16_afn_nocontract:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_f16_afn_nocontract:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_f16_afn_nocontract:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_f16_afn_nocontract:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rsq_f16_afn_nocontract:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_rcp_f16_e32 v0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_f16_afn_nocontract:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_f16_afn_nocontract:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_rcp_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call half @llvm.sqrt.f16(half %a)
%fdiv = fdiv afn half 1.0, %sqrt
ret half %fdiv
@@ -7896,166 +6257,70 @@ define <2 x half> @v_rsq_v2f16(<2 x half> %a) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_rsq_v2f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_rsq_v2f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_rsq_v2f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, 1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_rsq_v2f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_rsq_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0
-; GFX11-NEXT: v_sqrt_f16_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_rsq_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v1.l, v0.h
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v7, v4, v2
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v9, -v6, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v4, v9, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v7, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v6, -v6, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v7, v8, v2
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v6, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v7, 1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v3
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, v2, v7
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v0.l, 1.0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v1.l, 1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_rsq_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, 1.0
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
%fdiv = fdiv contract <2 x half> <half 1.0, half 1.0>, %sqrt
ret <2 x half> %fdiv
@@ -8339,166 +6604,70 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) {
; GFX10-FLUSH-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-IEEE-TRUE16-LABEL: v_neg_rsq_v2f16:
-; GFX11-IEEE-TRUE16: ; %bb.0:
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-IEEE-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
-; GFX11-IEEE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-IEEE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-IEEE-FAKE16-LABEL: v_neg_rsq_v2f16:
-; GFX11-IEEE-FAKE16: ; %bb.0:
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-IEEE-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-IEEE-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-IEEE-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-IEEE-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-IEEE-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-IEEE-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-IEEE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-IEEE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-TRUE16-LABEL: v_neg_rsq_v2f16:
-; GFX11-FLUSH-TRUE16: ; %bb.0:
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_sqrt_f16_e32 v0.h, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v1, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v6, v3, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v8, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4, v6, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_mul_f32 v1, v4, v1 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_dual_add_f32 v2, v2, v3 :: v_dual_and_b32 v1, 0xff800000, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v1, v1, v6
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
-; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v2
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.l, v1.l, v0.l, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v0.h, -1.0
-; GFX11-FLUSH-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
-; GFX11-FLUSH-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FLUSH-FAKE16-LABEL: v_neg_rsq_v2f16:
-; GFX11-FLUSH-FAKE16: ; %bb.0:
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-FLUSH-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-FLUSH-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-FLUSH-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-FLUSH-FAKE16-NEXT: s_setpc_b64 s[30:31]
-; GFX11-LABEL: v_neg_rsq_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v4, -1.0
-; GFX11-NEXT: v_sqrt_f16_e32 v1, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
-; GFX11-NEXT: v_rcp_f32_e32 v2, v2
-; GFX11-NEXT: v_rcp_f32_e32 v3, v3
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2
-; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2
-; GFX11-NEXT: v_mul_f32_e32 v3, v7, v3
-; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
-; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
-; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
-; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX11-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_neg_rsq_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v1.l, v0.h
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v0.l
+; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v7, v4, v2
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v9, -v6, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v4, v9, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v8, -v5, v7, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v6, -v6, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v7, v8, v2
+; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v6, v3
+; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v7, -1.0 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_dual_mul_f32 v2, v5, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.h, v3
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, v2, v7
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v0.l, -1.0
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v1.h, v1.l, -1.0
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_neg_rsq_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v4, -1.0
+; GFX11-FAKE16-NEXT: v_sqrt_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_rcp_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v4, v4, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v4, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v7, -v1, v4, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v2
+; GFX11-FAKE16-NEXT: v_mul_f32_e32 v3, v7, v3
+; GFX11-FAKE16-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_and_b32 v3, 0xff800000, v3
+; GFX11-FAKE16-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, v2, v5
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
%fdiv = fdiv contract <2 x half> <half -1.0, half -1.0>, %sqrt
ret <2 x half> %fdiv
@@ -8511,4 +6680,8 @@ declare <2 x half> @llvm.sqrt.v2f16(<2 x half>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX11-FLUSH: {{.*}}
+; GFX11-FLUSH-FAKE16: {{.*}}
+; GFX11-FLUSH-TRUE16: {{.*}}
; GFX11-IEEE: {{.*}}
+; GFX11-IEEE-FAKE16: {{.*}}
+; GFX11-IEEE-TRUE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 884ebd9c0ea08..4280fe14d35e3 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -269,19 +269,11 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2
}
define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_max_f16_e64 v0.l, v0.l, v0.l clamp
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
; GFX900: ; %bb.0:
@@ -312,12 +304,6 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; GISEL-GFX1100: ; %bb.0:
-; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1261,56 +1247,39 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
}
define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
-; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; SDAG-GFX1100-TRUE16: ; %bb.0:
-; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.h, v7.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp
-; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; SDAG-GFX1100-FAKE16: ; %bb.0:
-; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp
-; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX1100-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; SDAG-GFX900: ; %bb.0:
-; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v7, v7 clamp
-; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; SDAG-GFX906: ; %bb.0:
-; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v7, v7 clamp
-; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_mov_b32_e32 v0, v6
+; GFX906-NEXT: v_mov_b32_e32 v1, v2
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-VI: ; %bb.0:
@@ -1389,40 +1358,6 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v3, v3 clamp
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GISEL-GFX1100: ; %bb.0:
-; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GISEL-GFX1100-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GISEL-GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
-; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GISEL-GFX900: ; %bb.0:
-; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GISEL-GFX906: ; %bb.0:
-; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v2
-; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1517,11 +1452,9 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_max_f16_e64 v3.l, v3.l, v3.l clamp
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
More information about the llvm-commits
mailing list